{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-01 04:15:33.485620', 'step': 0, 'epoch': 0} {'type': 'pplx', 'content': 21944.183071258598, 'timestamp': '2025-10-01 04:15:33.488709', 'step': 0, 'epoch': 0} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:33.582288', 'step': 0, 'epoch': 1} {'type': 'loss', 'content': 0.9957584738731384, 'timestamp': '2025-10-01 04:15:33.594260', 'step': 1, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:33.658722', 'step': 1, 'epoch': 1} {'type': 'loss', 'content': 0.9864388704299927, 'timestamp': '2025-10-01 04:15:33.660487', 'step': 2, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:33.724830', 'step': 2, 'epoch': 1} {'type': 'loss', 'content': 0.9595818519592285, 'timestamp': '2025-10-01 04:15:33.726665', 'step': 3, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:33.786535', 'step': 3, 'epoch': 1} {'type': 'loss', 'content': 0.9695097208023071, 'timestamp': '2025-10-01 04:15:33.829843', 'step': 4, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:33.892747', 'step': 4, 'epoch': 1} {'type': 'loss', 'content': 0.859954833984375, 'timestamp': '2025-10-01 04:15:33.894731', 'step': 5, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:33.950340', 'step': 5, 'epoch': 1} {'type': 'loss', 'content': 0.6243321895599365, 'timestamp': '2025-10-01 04:15:33.952361', 'step': 6, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:34.005062', 'step': 6, 'epoch': 1} {'type': 'loss', 'content': 0.6934952735900879, 'timestamp': '2025-10-01 04:15:34.006918', 'step': 7, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:34.059826', 'step': 7, 'epoch': 1} {'type': 'loss', 'content': 0.8247389793395996, 'timestamp': '2025-10-01 04:15:34.065715', 'step': 8, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:34.119330', 'step': 8, 'epoch': 1} {'type': 'loss', 'content': 0.46895191073417664, 'timestamp': '2025-10-01 04:15:34.133017', 'step': 9, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:34.185767', 'step': 9, 'epoch': 1} {'type': 'loss', 'content': 0.5497643947601318, 'timestamp': '2025-10-01 04:15:34.187928', 'step': 10, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:34.240521', 'step': 10, 'epoch': 1} {'type': 'loss', 'content': 0.48457327485084534, 'timestamp': '2025-10-01 04:15:34.247946', 'step': 11, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:34.305975', 'step': 11, 'epoch': 1} {'type': 'loss', 'content': 0.5040982961654663, 'timestamp': '2025-10-01 04:15:34.317532', 'step': 12, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:34.371728', 'step': 12, 'epoch': 1} {'type': 'loss', 'content': 0.3165914714336395, 'timestamp': '2025-10-01 04:15:34.373309', 'step': 13, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:34.426378', 'step': 13, 'epoch': 1} {'type': 'loss', 'content': 0.3447871208190918, 'timestamp': '2025-10-01 04:15:34.428142', 'step': 14, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:34.480589', 'step': 14, 'epoch': 1} {'type': 'loss', 'content': 0.35542288422584534, 'timestamp': '2025-10-01 04:15:34.482717', 'step': 15, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:34.535032', 'step': 15, 'epoch': 1} {'type': 'loss', 'content': 0.39630192518234253, 'timestamp': '2025-10-01 04:15:34.540502', 'step': 16, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:34.592107', 'step': 16, 'epoch': 1} {'type': 'loss', 'content': 0.29788681864738464, 'timestamp': '2025-10-01 04:15:34.596822', 'step': 17, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:34.649969', 'step': 17, 'epoch': 1} {'type': 'loss', 'content': 0.33074599504470825, 'timestamp': '2025-10-01 04:15:34.652003', 'step': 18, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:15:34.729117', 'step': 18, 'epoch': 1} {'type': 'loss', 'content': 0.39447179436683655, 'timestamp': '2025-10-01 04:15:34.731112', 'step': 19, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:34.784868', 'step': 19, 'epoch': 1} {'type': 'loss', 'content': 0.30792951583862305, 'timestamp': '2025-10-01 04:15:34.790335', 'step': 20, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:34.842307', 'step': 20, 'epoch': 1} {'type': 'loss', 'content': 0.46326297521591187, 'timestamp': '2025-10-01 04:15:34.844454', 'step': 21, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:34.896939', 'step': 21, 'epoch': 1} {'type': 'loss', 'content': 0.32916322350502014, 'timestamp': '2025-10-01 04:15:34.898883', 'step': 22, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:34.951758', 'step': 22, 'epoch': 1} {'type': 'loss', 'content': 0.2508290410041809, 'timestamp': '2025-10-01 04:15:34.953845', 'step': 23, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:35.019313', 'step': 23, 'epoch': 1} {'type': 'loss', 'content': 0.36722227931022644, 'timestamp': '2025-10-01 04:15:35.024978', 'step': 24, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:35.077277', 'step': 24, 'epoch': 1} {'type': 'loss', 'content': 0.26500892639160156, 'timestamp': '2025-10-01 04:15:35.079344', 'step': 25, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:15:35.140403', 'step': 25, 'epoch': 1} {'type': 'loss', 'content': 0.29172196984291077, 'timestamp': '2025-10-01 04:15:35.142352', 'step': 26, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:35.195724', 'step': 26, 'epoch': 1} {'type': 'loss', 'content': 0.3991400897502899, 'timestamp': '2025-10-01 04:15:35.197937', 'step': 27, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:35.250786', 'step': 27, 'epoch': 1} {'type': 'loss', 'content': 0.3445204496383667, 'timestamp': '2025-10-01 04:15:35.257876', 'step': 28, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:35.310151', 'step': 28, 'epoch': 1} {'type': 'loss', 'content': 0.18278564512729645, 'timestamp': '2025-10-01 04:15:35.311777', 'step': 29, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:35.364462', 'step': 29, 'epoch': 1} {'type': 'loss', 'content': 0.15365855395793915, 'timestamp': '2025-10-01 04:15:35.366282', 'step': 30, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:35.419722', 'step': 30, 'epoch': 1} {'type': 'loss', 'content': 0.2685059607028961, 'timestamp': '2025-10-01 04:15:35.421931', 'step': 31, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:35.476536', 'step': 31, 'epoch': 1} {'type': 'loss', 'content': 0.2434316724538803, 'timestamp': '2025-10-01 04:15:35.482483', 'step': 32, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:35.535166', 'step': 32, 'epoch': 1} {'type': 'loss', 'content': 0.3007912039756775, 'timestamp': '2025-10-01 04:15:35.537522', 'step': 33, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:35.591435', 'step': 33, 'epoch': 1} {'type': 'loss', 'content': 0.35583457350730896, 'timestamp': '2025-10-01 04:15:35.593385', 'step': 34, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:35.646304', 'step': 34, 'epoch': 1} {'type': 'loss', 'content': 0.3443217873573303, 'timestamp': '2025-10-01 04:15:35.648573', 'step': 35, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:35.704407', 'step': 35, 'epoch': 1} {'type': 'loss', 'content': 0.24711312353610992, 'timestamp': '2025-10-01 04:15:35.710016', 'step': 36, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:35.762199', 'step': 36, 'epoch': 1} {'type': 'loss', 'content': 0.34165817499160767, 'timestamp': '2025-10-01 04:15:35.763931', 'step': 37, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:35.820582', 'step': 37, 'epoch': 1} {'type': 'loss', 'content': 0.23560629785060883, 'timestamp': '2025-10-01 04:15:35.823164', 'step': 38, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:35.877805', 'step': 38, 'epoch': 1} {'type': 'loss', 'content': 0.35849884152412415, 'timestamp': '2025-10-01 04:15:35.880386', 'step': 39, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:35.934888', 'step': 39, 'epoch': 1} {'type': 'loss', 'content': 0.2861058712005615, 'timestamp': '2025-10-01 04:15:35.940777', 'step': 40, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:35.994023', 'step': 40, 'epoch': 1} {'type': 'loss', 'content': 0.20586401224136353, 'timestamp': '2025-10-01 04:15:35.996278', 'step': 41, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:36.058980', 'step': 41, 'epoch': 1} {'type': 'loss', 'content': 0.24739445745944977, 'timestamp': '2025-10-01 04:15:36.061572', 'step': 42, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:36.114791', 'step': 42, 'epoch': 1} {'type': 'loss', 'content': 0.20474421977996826, 'timestamp': '2025-10-01 04:15:36.117080', 'step': 43, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:36.170370', 'step': 43, 'epoch': 1} {'type': 'loss', 'content': 0.31253013014793396, 'timestamp': '2025-10-01 04:15:36.175820', 'step': 44, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:36.229075', 'step': 44, 'epoch': 1} {'type': 'loss', 'content': 0.2719212770462036, 'timestamp': '2025-10-01 04:15:36.231594', 'step': 45, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:36.284743', 'step': 45, 'epoch': 1} {'type': 'loss', 'content': 0.2645176649093628, 'timestamp': '2025-10-01 04:15:36.286791', 'step': 46, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:15:36.340802', 'step': 46, 'epoch': 1} {'type': 'loss', 'content': 0.23961660265922546, 'timestamp': '2025-10-01 04:15:36.343201', 'step': 47, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:36.396237', 'step': 47, 'epoch': 1} {'type': 'loss', 'content': 0.30784323811531067, 'timestamp': '2025-10-01 04:15:36.404463', 'step': 48, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:36.457294', 'step': 48, 'epoch': 1} {'type': 'loss', 'content': 0.20760907232761383, 'timestamp': '2025-10-01 04:15:36.459811', 'step': 49, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:36.513766', 'step': 49, 'epoch': 1} {'type': 'loss', 'content': 0.2127155363559723, 'timestamp': '2025-10-01 04:15:36.516131', 'step': 50, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:36.569186', 'step': 50, 'epoch': 1} {'type': 'loss', 'content': 0.2595609724521637, 'timestamp': '2025-10-01 04:15:36.571245', 'step': 51, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:36.634895', 'step': 51, 'epoch': 1} {'type': 'loss', 'content': 0.3592156171798706, 'timestamp': '2025-10-01 04:15:36.640522', 'step': 52, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:36.704199', 'step': 52, 'epoch': 1} {'type': 'loss', 'content': 0.23540565371513367, 'timestamp': '2025-10-01 04:15:36.706661', 'step': 53, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:15:36.760211', 'step': 53, 'epoch': 1} {'type': 'loss', 'content': 0.21864400804042816, 'timestamp': '2025-10-01 04:15:36.763404', 'step': 54, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:15:36.819155', 'step': 54, 'epoch': 1} {'type': 'loss', 'content': 0.2739979922771454, 'timestamp': '2025-10-01 04:15:36.821382', 'step': 55, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:15:36.890633', 'step': 55, 'epoch': 1} {'type': 'loss', 'content': 0.15788410604000092, 'timestamp': '2025-10-01 04:15:36.896315', 'step': 56, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:36.948427', 'step': 56, 'epoch': 1} {'type': 'loss', 'content': 0.16920799016952515, 'timestamp': '2025-10-01 04:15:36.950414', 'step': 57, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:37.002987', 'step': 57, 'epoch': 1} {'type': 'loss', 'content': 0.22730357944965363, 'timestamp': '2025-10-01 04:15:37.004813', 'step': 58, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:37.057997', 'step': 58, 'epoch': 1} {'type': 'loss', 'content': 0.2270258367061615, 'timestamp': '2025-10-01 04:15:37.059906', 'step': 59, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:15:37.113212', 'step': 59, 'epoch': 1} {'type': 'loss', 'content': 0.1848832666873932, 'timestamp': '2025-10-01 04:15:37.119132', 'step': 60, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:37.170814', 'step': 60, 'epoch': 1} {'type': 'loss', 'content': 0.1914934664964676, 'timestamp': '2025-10-01 04:15:37.172796', 'step': 61, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:37.225366', 'step': 61, 'epoch': 1} {'type': 'loss', 'content': 0.16856025159358978, 'timestamp': '2025-10-01 04:15:37.227175', 'step': 62, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:37.279802', 'step': 62, 'epoch': 1} {'type': 'loss', 'content': 0.2260177731513977, 'timestamp': '2025-10-01 04:15:37.282602', 'step': 63, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:37.337617', 'step': 63, 'epoch': 1} {'type': 'loss', 'content': 0.2906312048435211, 'timestamp': '2025-10-01 04:15:37.343694', 'step': 64, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:37.395846', 'step': 64, 'epoch': 1} {'type': 'loss', 'content': 0.18130168318748474, 'timestamp': '2025-10-01 04:15:37.398218', 'step': 65, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:15:37.452107', 'step': 65, 'epoch': 1} {'type': 'loss', 'content': 0.23978762328624725, 'timestamp': '2025-10-01 04:15:37.454059', 'step': 66, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:15:37.508880', 'step': 66, 'epoch': 1} {'type': 'loss', 'content': 0.29432591795921326, 'timestamp': '2025-10-01 04:15:37.511236', 'step': 67, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:37.563641', 'step': 67, 'epoch': 1} {'type': 'loss', 'content': 0.22621358931064606, 'timestamp': '2025-10-01 04:15:37.568709', 'step': 68, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:37.621028', 'step': 68, 'epoch': 1} {'type': 'loss', 'content': 0.22982224822044373, 'timestamp': '2025-10-01 04:15:37.622931', 'step': 69, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:37.681908', 'step': 69, 'epoch': 1} {'type': 'loss', 'content': 0.2515338063240051, 'timestamp': '2025-10-01 04:15:37.683692', 'step': 70, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:37.735924', 'step': 70, 'epoch': 1} {'type': 'loss', 'content': 0.17957286536693573, 'timestamp': '2025-10-01 04:15:37.737927', 'step': 71, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:37.790886', 'step': 71, 'epoch': 1} {'type': 'loss', 'content': 0.2471006214618683, 'timestamp': '2025-10-01 04:15:37.796606', 'step': 72, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:37.849271', 'step': 72, 'epoch': 1} {'type': 'loss', 'content': 0.20029425621032715, 'timestamp': '2025-10-01 04:15:37.851485', 'step': 73, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:37.907640', 'step': 73, 'epoch': 1} {'type': 'loss', 'content': 0.23620398342609406, 'timestamp': '2025-10-01 04:15:37.909671', 'step': 74, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:37.962693', 'step': 74, 'epoch': 1} {'type': 'loss', 'content': 0.21076801419258118, 'timestamp': '2025-10-01 04:15:37.964507', 'step': 75, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:38.017534', 'step': 75, 'epoch': 1} {'type': 'loss', 'content': 0.3540920615196228, 'timestamp': '2025-10-01 04:15:38.023476', 'step': 76, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:38.075452', 'step': 76, 'epoch': 1} {'type': 'loss', 'content': 0.15917646884918213, 'timestamp': '2025-10-01 04:15:38.077520', 'step': 77, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:38.130380', 'step': 77, 'epoch': 1} {'type': 'loss', 'content': 0.24457761645317078, 'timestamp': '2025-10-01 04:15:38.132447', 'step': 78, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:38.186436', 'step': 78, 'epoch': 1} {'type': 'loss', 'content': 0.18149685859680176, 'timestamp': '2025-10-01 04:15:38.188801', 'step': 79, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:38.243508', 'step': 79, 'epoch': 1} {'type': 'loss', 'content': 0.25645026564598083, 'timestamp': '2025-10-01 04:15:38.249889', 'step': 80, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:15:38.302098', 'step': 80, 'epoch': 1} {'type': 'loss', 'content': 0.26605021953582764, 'timestamp': '2025-10-01 04:15:38.304432', 'step': 81, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:38.357166', 'step': 81, 'epoch': 1} {'type': 'loss', 'content': 0.19841280579566956, 'timestamp': '2025-10-01 04:15:38.359654', 'step': 82, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:38.411914', 'step': 82, 'epoch': 1} {'type': 'loss', 'content': 0.15691930055618286, 'timestamp': '2025-10-01 04:15:38.414528', 'step': 83, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:38.469360', 'step': 83, 'epoch': 1} {'type': 'loss', 'content': 0.2787221074104309, 'timestamp': '2025-10-01 04:15:38.474346', 'step': 84, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:38.526541', 'step': 84, 'epoch': 1} {'type': 'loss', 'content': 0.1686808317899704, 'timestamp': '2025-10-01 04:15:38.528318', 'step': 85, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:38.580385', 'step': 85, 'epoch': 1} {'type': 'loss', 'content': 0.2048998326063156, 'timestamp': '2025-10-01 04:15:38.582388', 'step': 86, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:38.635237', 'step': 86, 'epoch': 1} {'type': 'loss', 'content': 0.21828621625900269, 'timestamp': '2025-10-01 04:15:38.637224', 'step': 87, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:38.689785', 'step': 87, 'epoch': 1} {'type': 'loss', 'content': 0.2338770180940628, 'timestamp': '2025-10-01 04:15:38.695154', 'step': 88, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:38.748783', 'step': 88, 'epoch': 1} {'type': 'loss', 'content': 0.17135381698608398, 'timestamp': '2025-10-01 04:15:38.750809', 'step': 89, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:38.803900', 'step': 89, 'epoch': 1} {'type': 'loss', 'content': 0.260998398065567, 'timestamp': '2025-10-01 04:15:38.806169', 'step': 90, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:38.858793', 'step': 90, 'epoch': 1} {'type': 'loss', 'content': 0.20011447370052338, 'timestamp': '2025-10-01 04:15:38.861108', 'step': 91, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:38.920248', 'step': 91, 'epoch': 1} {'type': 'loss', 'content': 0.18324977159500122, 'timestamp': '2025-10-01 04:15:38.925577', 'step': 92, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:38.977634', 'step': 92, 'epoch': 1} {'type': 'loss', 'content': 0.30098676681518555, 'timestamp': '2025-10-01 04:15:38.979205', 'step': 93, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:15:39.031528', 'step': 93, 'epoch': 1} {'type': 'loss', 'content': 0.2290862500667572, 'timestamp': '2025-10-01 04:15:39.033999', 'step': 94, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:39.086932', 'step': 94, 'epoch': 1} {'type': 'loss', 'content': 0.20065739750862122, 'timestamp': '2025-10-01 04:15:39.088741', 'step': 95, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:39.142136', 'step': 95, 'epoch': 1} {'type': 'loss', 'content': 0.21880486607551575, 'timestamp': '2025-10-01 04:15:39.152609', 'step': 96, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:39.205092', 'step': 96, 'epoch': 1} {'type': 'loss', 'content': 0.18282054364681244, 'timestamp': '2025-10-01 04:15:39.207186', 'step': 97, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:39.259774', 'step': 97, 'epoch': 1} {'type': 'loss', 'content': 0.21728427708148956, 'timestamp': '2025-10-01 04:15:39.261951', 'step': 98, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:39.315445', 'step': 98, 'epoch': 1} {'type': 'loss', 'content': 0.16183418035507202, 'timestamp': '2025-10-01 04:15:39.317452', 'step': 99, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:39.370177', 'step': 99, 'epoch': 1} {'type': 'loss', 'content': 0.23636838793754578, 'timestamp': '2025-10-01 04:15:39.375743', 'step': 100, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:39.427635', 'step': 100, 'epoch': 1} {'type': 'loss', 'content': 0.24951133131980896, 'timestamp': '2025-10-01 04:15:39.429639', 'step': 101, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:39.482322', 'step': 101, 'epoch': 1} {'type': 'loss', 'content': 0.23877392709255219, 'timestamp': '2025-10-01 04:15:39.484312', 'step': 102, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:39.536800', 'step': 102, 'epoch': 1} {'type': 'loss', 'content': 0.3299163281917572, 'timestamp': '2025-10-01 04:15:39.538884', 'step': 103, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:39.591952', 'step': 103, 'epoch': 1} {'type': 'loss', 'content': 0.2038746029138565, 'timestamp': '2025-10-01 04:15:39.597347', 'step': 104, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:39.649644', 'step': 104, 'epoch': 1} {'type': 'loss', 'content': 0.22192884981632233, 'timestamp': '2025-10-01 04:15:39.651461', 'step': 105, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:39.704206', 'step': 105, 'epoch': 1} {'type': 'loss', 'content': 0.26878103613853455, 'timestamp': '2025-10-01 04:15:39.706334', 'step': 106, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:39.759553', 'step': 106, 'epoch': 1} {'type': 'loss', 'content': 0.3319077789783478, 'timestamp': '2025-10-01 04:15:39.761803', 'step': 107, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:39.815616', 'step': 107, 'epoch': 1} {'type': 'loss', 'content': 0.34982725977897644, 'timestamp': '2025-10-01 04:15:39.821182', 'step': 108, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:15:39.874127', 'step': 108, 'epoch': 1} {'type': 'loss', 'content': 0.18482764065265656, 'timestamp': '2025-10-01 04:15:39.875585', 'step': 109, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:39.927976', 'step': 109, 'epoch': 1} {'type': 'loss', 'content': 0.2639634907245636, 'timestamp': '2025-10-01 04:15:39.929989', 'step': 110, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:39.982541', 'step': 110, 'epoch': 1} {'type': 'loss', 'content': 0.1425142139196396, 'timestamp': '2025-10-01 04:15:39.984503', 'step': 111, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:40.037270', 'step': 111, 'epoch': 1} {'type': 'loss', 'content': 0.1886337846517563, 'timestamp': '2025-10-01 04:15:40.042997', 'step': 112, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:40.095175', 'step': 112, 'epoch': 1} {'type': 'loss', 'content': 0.2366170585155487, 'timestamp': '2025-10-01 04:15:40.100162', 'step': 113, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:40.152667', 'step': 113, 'epoch': 1} {'type': 'loss', 'content': 0.3649568259716034, 'timestamp': '2025-10-01 04:15:40.154646', 'step': 114, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:15:40.207350', 'step': 114, 'epoch': 1} {'type': 'loss', 'content': 0.17887240648269653, 'timestamp': '2025-10-01 04:15:40.209346', 'step': 115, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:40.262542', 'step': 115, 'epoch': 1} {'type': 'loss', 'content': 0.2597115635871887, 'timestamp': '2025-10-01 04:15:40.268188', 'step': 116, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:40.320039', 'step': 116, 'epoch': 1} {'type': 'loss', 'content': 0.19768933951854706, 'timestamp': '2025-10-01 04:15:40.322566', 'step': 117, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:40.380680', 'step': 117, 'epoch': 1} {'type': 'loss', 'content': 0.21357861161231995, 'timestamp': '2025-10-01 04:15:40.382521', 'step': 118, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:15:40.447166', 'step': 118, 'epoch': 1} {'type': 'loss', 'content': 0.2360459566116333, 'timestamp': '2025-10-01 04:15:40.449295', 'step': 119, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:40.506008', 'step': 119, 'epoch': 1} {'type': 'loss', 'content': 0.2797749638557434, 'timestamp': '2025-10-01 04:15:40.512042', 'step': 120, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:40.569320', 'step': 120, 'epoch': 1} {'type': 'loss', 'content': 0.23082374036312103, 'timestamp': '2025-10-01 04:15:40.570792', 'step': 121, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:15:40.622920', 'step': 121, 'epoch': 1} {'type': 'loss', 'content': 0.1977701336145401, 'timestamp': '2025-10-01 04:15:40.627432', 'step': 122, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:15:40.701993', 'step': 122, 'epoch': 1} {'type': 'loss', 'content': 0.3114495575428009, 'timestamp': '2025-10-01 04:15:40.704025', 'step': 123, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:40.758102', 'step': 123, 'epoch': 1} {'type': 'loss', 'content': 0.20230333507061005, 'timestamp': '2025-10-01 04:15:40.764261', 'step': 124, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:40.816819', 'step': 124, 'epoch': 1} {'type': 'loss', 'content': 0.2332182675600052, 'timestamp': '2025-10-01 04:15:40.818637', 'step': 125, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:40.870854', 'step': 125, 'epoch': 1} {'type': 'loss', 'content': 0.2070736438035965, 'timestamp': '2025-10-01 04:15:40.874137', 'step': 126, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:40.929460', 'step': 126, 'epoch': 1} {'type': 'loss', 'content': 0.2269219607114792, 'timestamp': '2025-10-01 04:15:40.945355', 'step': 127, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:40.997783', 'step': 127, 'epoch': 1} {'type': 'loss', 'content': 0.41348302364349365, 'timestamp': '2025-10-01 04:15:41.003442', 'step': 128, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:41.062476', 'step': 128, 'epoch': 1} {'type': 'loss', 'content': 0.2072644680738449, 'timestamp': '2025-10-01 04:15:41.066127', 'step': 129, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:41.123135', 'step': 129, 'epoch': 1} {'type': 'loss', 'content': 0.1926860511302948, 'timestamp': '2025-10-01 04:15:41.125629', 'step': 130, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:41.178603', 'step': 130, 'epoch': 1} {'type': 'loss', 'content': 0.16900333762168884, 'timestamp': '2025-10-01 04:15:41.180306', 'step': 131, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:41.235413', 'step': 131, 'epoch': 1} {'type': 'loss', 'content': 0.20105043053627014, 'timestamp': '2025-10-01 04:15:41.240934', 'step': 132, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:41.296718', 'step': 132, 'epoch': 1} {'type': 'loss', 'content': 0.24951201677322388, 'timestamp': '2025-10-01 04:15:41.299256', 'step': 133, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:41.352099', 'step': 133, 'epoch': 1} {'type': 'loss', 'content': 0.286602258682251, 'timestamp': '2025-10-01 04:15:41.353938', 'step': 134, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:41.406750', 'step': 134, 'epoch': 1} {'type': 'loss', 'content': 0.18069526553153992, 'timestamp': '2025-10-01 04:15:41.408928', 'step': 135, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:41.463002', 'step': 135, 'epoch': 1} {'type': 'loss', 'content': 0.190597265958786, 'timestamp': '2025-10-01 04:15:41.468693', 'step': 136, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:41.521559', 'step': 136, 'epoch': 1} {'type': 'loss', 'content': 0.26131364703178406, 'timestamp': '2025-10-01 04:15:41.523409', 'step': 137, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:41.583541', 'step': 137, 'epoch': 1} {'type': 'loss', 'content': 0.20680662989616394, 'timestamp': '2025-10-01 04:15:41.589457', 'step': 138, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:41.643632', 'step': 138, 'epoch': 1} {'type': 'loss', 'content': 0.2243179827928543, 'timestamp': '2025-10-01 04:15:41.646310', 'step': 139, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:15:41.701083', 'step': 139, 'epoch': 1} {'type': 'loss', 'content': 0.2830616533756256, 'timestamp': '2025-10-01 04:15:41.706858', 'step': 140, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:41.759203', 'step': 140, 'epoch': 1} {'type': 'loss', 'content': 0.22828736901283264, 'timestamp': '2025-10-01 04:15:41.763142', 'step': 141, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:41.820495', 'step': 141, 'epoch': 1} {'type': 'loss', 'content': 0.18845339119434357, 'timestamp': '2025-10-01 04:15:41.822652', 'step': 142, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:41.874591', 'step': 142, 'epoch': 1} {'type': 'loss', 'content': 0.2209016978740692, 'timestamp': '2025-10-01 04:15:41.877536', 'step': 143, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:41.929914', 'step': 143, 'epoch': 1} {'type': 'loss', 'content': 0.2887532413005829, 'timestamp': '2025-10-01 04:15:41.935490', 'step': 144, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:41.987579', 'step': 144, 'epoch': 1} {'type': 'loss', 'content': 0.20793958008289337, 'timestamp': '2025-10-01 04:15:41.989233', 'step': 145, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:42.043120', 'step': 145, 'epoch': 1} {'type': 'loss', 'content': 0.2759399116039276, 'timestamp': '2025-10-01 04:15:42.044834', 'step': 146, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:42.096919', 'step': 146, 'epoch': 1} {'type': 'loss', 'content': 0.2545355558395386, 'timestamp': '2025-10-01 04:15:42.098843', 'step': 147, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:42.151193', 'step': 147, 'epoch': 1} {'type': 'loss', 'content': 0.22090870141983032, 'timestamp': '2025-10-01 04:15:42.156816', 'step': 148, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:15:42.209008', 'step': 148, 'epoch': 1} {'type': 'loss', 'content': 0.1341852992773056, 'timestamp': '2025-10-01 04:15:42.211076', 'step': 149, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:42.263751', 'step': 149, 'epoch': 1} {'type': 'loss', 'content': 0.2568507492542267, 'timestamp': '2025-10-01 04:15:42.265695', 'step': 150, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:15:42.318010', 'step': 150, 'epoch': 1} {'type': 'loss', 'content': 0.16222567856311798, 'timestamp': '2025-10-01 04:15:42.320418', 'step': 151, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:15:42.372812', 'step': 151, 'epoch': 1} {'type': 'loss', 'content': 0.1776992827653885, 'timestamp': '2025-10-01 04:15:42.378050', 'step': 152, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:42.430553', 'step': 152, 'epoch': 1} {'type': 'loss', 'content': 0.27879950404167175, 'timestamp': '2025-10-01 04:15:42.432611', 'step': 153, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:42.485608', 'step': 153, 'epoch': 1} {'type': 'loss', 'content': 0.2956344783306122, 'timestamp': '2025-10-01 04:15:42.487403', 'step': 154, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:15:42.553385', 'step': 154, 'epoch': 1} {'type': 'loss', 'content': 0.3431094288825989, 'timestamp': '2025-10-01 04:15:42.555630', 'step': 155, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:42.611868', 'step': 155, 'epoch': 1} {'type': 'loss', 'content': 0.23234885931015015, 'timestamp': '2025-10-01 04:15:42.618541', 'step': 156, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:42.670865', 'step': 156, 'epoch': 1} {'type': 'loss', 'content': 0.1702612191438675, 'timestamp': '2025-10-01 04:15:42.672695', 'step': 157, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:42.726198', 'step': 157, 'epoch': 1} {'type': 'loss', 'content': 0.2507626712322235, 'timestamp': '2025-10-01 04:15:42.739351', 'step': 158, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:42.801606', 'step': 158, 'epoch': 1} {'type': 'loss', 'content': 0.1794973909854889, 'timestamp': '2025-10-01 04:15:42.803604', 'step': 159, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:42.861624', 'step': 159, 'epoch': 1} {'type': 'loss', 'content': 0.2353724241256714, 'timestamp': '2025-10-01 04:15:42.867289', 'step': 160, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:15:42.920913', 'step': 160, 'epoch': 1} {'type': 'loss', 'content': 0.2365693747997284, 'timestamp': '2025-10-01 04:15:42.922913', 'step': 161, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:42.977935', 'step': 161, 'epoch': 1} {'type': 'loss', 'content': 0.34307724237442017, 'timestamp': '2025-10-01 04:15:42.979384', 'step': 162, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:43.041642', 'step': 162, 'epoch': 1} {'type': 'loss', 'content': 0.20453345775604248, 'timestamp': '2025-10-01 04:15:43.049227', 'step': 163, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:43.114043', 'step': 163, 'epoch': 1} {'type': 'loss', 'content': 0.17103834450244904, 'timestamp': '2025-10-01 04:15:43.119970', 'step': 164, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:43.172083', 'step': 164, 'epoch': 1} {'type': 'loss', 'content': 0.2810572385787964, 'timestamp': '2025-10-01 04:15:43.174009', 'step': 165, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:15:43.226999', 'step': 165, 'epoch': 1} {'type': 'loss', 'content': 0.16581936180591583, 'timestamp': '2025-10-01 04:15:43.229323', 'step': 166, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:43.282782', 'step': 166, 'epoch': 1} {'type': 'loss', 'content': 0.13147376477718353, 'timestamp': '2025-10-01 04:15:43.285138', 'step': 167, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:43.338143', 'step': 167, 'epoch': 1} {'type': 'loss', 'content': 0.2316911667585373, 'timestamp': '2025-10-01 04:15:43.343664', 'step': 168, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:43.395843', 'step': 168, 'epoch': 1} {'type': 'loss', 'content': 0.14493155479431152, 'timestamp': '2025-10-01 04:15:43.397937', 'step': 169, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:43.460904', 'step': 169, 'epoch': 1} {'type': 'loss', 'content': 0.18407317996025085, 'timestamp': '2025-10-01 04:15:43.463656', 'step': 170, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:43.517007', 'step': 170, 'epoch': 1} {'type': 'loss', 'content': 0.178151935338974, 'timestamp': '2025-10-01 04:15:43.519022', 'step': 171, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:43.572030', 'step': 171, 'epoch': 1} {'type': 'loss', 'content': 0.30646389722824097, 'timestamp': '2025-10-01 04:15:43.578032', 'step': 172, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:43.630593', 'step': 172, 'epoch': 1} {'type': 'loss', 'content': 0.27621933817863464, 'timestamp': '2025-10-01 04:15:43.633102', 'step': 173, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:43.691258', 'step': 173, 'epoch': 1} {'type': 'loss', 'content': 0.2484465390443802, 'timestamp': '2025-10-01 04:15:43.693417', 'step': 174, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:43.746501', 'step': 174, 'epoch': 1} {'type': 'loss', 'content': 0.2741093635559082, 'timestamp': '2025-10-01 04:15:43.748667', 'step': 175, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:43.809329', 'step': 175, 'epoch': 1} {'type': 'loss', 'content': 0.2698625922203064, 'timestamp': '2025-10-01 04:15:43.814822', 'step': 176, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:43.866858', 'step': 176, 'epoch': 1} {'type': 'loss', 'content': 0.26880544424057007, 'timestamp': '2025-10-01 04:15:43.868789', 'step': 177, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:43.927271', 'step': 177, 'epoch': 1} {'type': 'loss', 'content': 0.17092157900333405, 'timestamp': '2025-10-01 04:15:43.929524', 'step': 178, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:43.983096', 'step': 178, 'epoch': 1} {'type': 'loss', 'content': 0.2299656867980957, 'timestamp': '2025-10-01 04:15:43.985389', 'step': 179, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:44.039046', 'step': 179, 'epoch': 1} {'type': 'loss', 'content': 0.18379178643226624, 'timestamp': '2025-10-01 04:15:44.044539', 'step': 180, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:15:44.111231', 'step': 180, 'epoch': 1} {'type': 'loss', 'content': 0.2560789883136749, 'timestamp': '2025-10-01 04:15:44.113454', 'step': 181, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:44.166733', 'step': 181, 'epoch': 1} {'type': 'loss', 'content': 0.17233166098594666, 'timestamp': '2025-10-01 04:15:44.169497', 'step': 182, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:44.222276', 'step': 182, 'epoch': 1} {'type': 'loss', 'content': 0.23182691633701324, 'timestamp': '2025-10-01 04:15:44.224718', 'step': 183, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:44.277768', 'step': 183, 'epoch': 1} {'type': 'loss', 'content': 0.16568629443645477, 'timestamp': '2025-10-01 04:15:44.283478', 'step': 184, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:15:44.336402', 'step': 184, 'epoch': 1} {'type': 'loss', 'content': 0.20148229598999023, 'timestamp': '2025-10-01 04:15:44.338848', 'step': 185, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:44.391919', 'step': 185, 'epoch': 1} {'type': 'loss', 'content': 0.1372135877609253, 'timestamp': '2025-10-01 04:15:44.414247', 'step': 186, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:44.466934', 'step': 186, 'epoch': 1} {'type': 'loss', 'content': 0.19638216495513916, 'timestamp': '2025-10-01 04:15:44.468907', 'step': 187, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:44.522565', 'step': 187, 'epoch': 1} {'type': 'loss', 'content': 0.1974649727344513, 'timestamp': '2025-10-01 04:15:44.528073', 'step': 188, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:15:44.580596', 'step': 188, 'epoch': 1} {'type': 'loss', 'content': 0.2481427937746048, 'timestamp': '2025-10-01 04:15:44.582358', 'step': 189, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:15:44.634976', 'step': 189, 'epoch': 1} {'type': 'loss', 'content': 0.3306092321872711, 'timestamp': '2025-10-01 04:15:44.636957', 'step': 190, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:44.690871', 'step': 190, 'epoch': 1} {'type': 'loss', 'content': 0.15122434496879578, 'timestamp': '2025-10-01 04:15:44.701179', 'step': 191, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:44.760915', 'step': 191, 'epoch': 1} {'type': 'loss', 'content': 0.3449341356754303, 'timestamp': '2025-10-01 04:15:44.773822', 'step': 192, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:44.828205', 'step': 192, 'epoch': 1} {'type': 'loss', 'content': 0.2372373640537262, 'timestamp': '2025-10-01 04:15:44.830153', 'step': 193, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:44.889431', 'step': 193, 'epoch': 1} {'type': 'loss', 'content': 0.24787670373916626, 'timestamp': '2025-10-01 04:15:44.891168', 'step': 194, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:15:44.951722', 'step': 194, 'epoch': 1} {'type': 'loss', 'content': 0.1949719339609146, 'timestamp': '2025-10-01 04:15:44.953545', 'step': 195, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:45.005337', 'step': 195, 'epoch': 1} {'type': 'loss', 'content': 0.220179483294487, 'timestamp': '2025-10-01 04:15:45.010850', 'step': 196, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:45.064420', 'step': 196, 'epoch': 1} {'type': 'loss', 'content': 0.20204506814479828, 'timestamp': '2025-10-01 04:15:45.066380', 'step': 197, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:45.118874', 'step': 197, 'epoch': 1} {'type': 'loss', 'content': 0.3280376195907593, 'timestamp': '2025-10-01 04:15:45.120974', 'step': 198, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:45.172990', 'step': 198, 'epoch': 1} {'type': 'loss', 'content': 0.15438063442707062, 'timestamp': '2025-10-01 04:15:45.175027', 'step': 199, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:15:45.237351', 'step': 199, 'epoch': 1} {'type': 'loss', 'content': 0.1471916139125824, 'timestamp': '2025-10-01 04:15:45.242874', 'step': 200, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:45.294832', 'step': 200, 'epoch': 1} {'type': 'loss', 'content': 0.3492889404296875, 'timestamp': '2025-10-01 04:15:45.296965', 'step': 201, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:45.361963', 'step': 201, 'epoch': 1} {'type': 'loss', 'content': 0.1989872008562088, 'timestamp': '2025-10-01 04:15:45.364437', 'step': 202, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:15:45.417020', 'step': 202, 'epoch': 1} {'type': 'loss', 'content': 0.1454041749238968, 'timestamp': '2025-10-01 04:15:45.426380', 'step': 203, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:45.485790', 'step': 203, 'epoch': 1} {'type': 'loss', 'content': 0.25966978073120117, 'timestamp': '2025-10-01 04:15:45.491109', 'step': 204, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:45.543672', 'step': 204, 'epoch': 1} {'type': 'loss', 'content': 0.34678250551223755, 'timestamp': '2025-10-01 04:15:45.548578', 'step': 205, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:45.610285', 'step': 205, 'epoch': 1} {'type': 'loss', 'content': 0.1562858521938324, 'timestamp': '2025-10-01 04:15:45.617836', 'step': 206, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:45.672004', 'step': 206, 'epoch': 1} {'type': 'loss', 'content': 0.2012602537870407, 'timestamp': '2025-10-01 04:15:45.673974', 'step': 207, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:45.727469', 'step': 207, 'epoch': 1} {'type': 'loss', 'content': 0.1769435703754425, 'timestamp': '2025-10-01 04:15:45.732770', 'step': 208, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:45.794809', 'step': 208, 'epoch': 1} {'type': 'loss', 'content': 0.2026776671409607, 'timestamp': '2025-10-01 04:15:45.796835', 'step': 209, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:45.849162', 'step': 209, 'epoch': 1} {'type': 'loss', 'content': 0.3464266061782837, 'timestamp': '2025-10-01 04:15:45.851729', 'step': 210, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:45.910433', 'step': 210, 'epoch': 1} {'type': 'loss', 'content': 0.15893346071243286, 'timestamp': '2025-10-01 04:15:45.912604', 'step': 211, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:45.973454', 'step': 211, 'epoch': 1} {'type': 'loss', 'content': 0.2533086836338043, 'timestamp': '2025-10-01 04:15:45.978986', 'step': 212, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:46.030798', 'step': 212, 'epoch': 1} {'type': 'loss', 'content': 0.1663459986448288, 'timestamp': '2025-10-01 04:15:46.033110', 'step': 213, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:46.085683', 'step': 213, 'epoch': 1} {'type': 'loss', 'content': 0.2613593637943268, 'timestamp': '2025-10-01 04:15:46.087499', 'step': 214, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:46.140254', 'step': 214, 'epoch': 1} {'type': 'loss', 'content': 0.17578339576721191, 'timestamp': '2025-10-01 04:15:46.142174', 'step': 215, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:46.195361', 'step': 215, 'epoch': 1} {'type': 'loss', 'content': 0.3032434284687042, 'timestamp': '2025-10-01 04:15:46.200608', 'step': 216, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:46.259712', 'step': 216, 'epoch': 1} {'type': 'loss', 'content': 0.2068294882774353, 'timestamp': '2025-10-01 04:15:46.270457', 'step': 217, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:46.322648', 'step': 217, 'epoch': 1} {'type': 'loss', 'content': 0.2154686003923416, 'timestamp': '2025-10-01 04:15:46.324979', 'step': 218, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:46.378160', 'step': 218, 'epoch': 1} {'type': 'loss', 'content': 0.32153165340423584, 'timestamp': '2025-10-01 04:15:46.380180', 'step': 219, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:46.444763', 'step': 219, 'epoch': 1} {'type': 'loss', 'content': 0.2674624025821686, 'timestamp': '2025-10-01 04:15:46.450317', 'step': 220, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:46.503171', 'step': 220, 'epoch': 1} {'type': 'loss', 'content': 0.2144237607717514, 'timestamp': '2025-10-01 04:15:46.506256', 'step': 221, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:46.558903', 'step': 221, 'epoch': 1} {'type': 'loss', 'content': 0.12933088839054108, 'timestamp': '2025-10-01 04:15:46.560938', 'step': 222, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:46.616251', 'step': 222, 'epoch': 1} {'type': 'loss', 'content': 0.2337339073419571, 'timestamp': '2025-10-01 04:15:46.618280', 'step': 223, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:46.670754', 'step': 223, 'epoch': 1} {'type': 'loss', 'content': 0.205263152718544, 'timestamp': '2025-10-01 04:15:46.676470', 'step': 224, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:46.728475', 'step': 224, 'epoch': 1} {'type': 'loss', 'content': 0.19032272696495056, 'timestamp': '2025-10-01 04:15:46.730451', 'step': 225, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:46.783898', 'step': 225, 'epoch': 1} {'type': 'loss', 'content': 0.2777189612388611, 'timestamp': '2025-10-01 04:15:46.785791', 'step': 226, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:46.838135', 'step': 226, 'epoch': 1} {'type': 'loss', 'content': 0.24066655337810516, 'timestamp': '2025-10-01 04:15:46.840281', 'step': 227, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:46.895123', 'step': 227, 'epoch': 1} {'type': 'loss', 'content': 0.19914409518241882, 'timestamp': '2025-10-01 04:15:46.900889', 'step': 228, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:46.953291', 'step': 228, 'epoch': 1} {'type': 'loss', 'content': 0.1907014697790146, 'timestamp': '2025-10-01 04:15:46.956077', 'step': 229, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:47.023866', 'step': 229, 'epoch': 1} {'type': 'loss', 'content': 0.2761854827404022, 'timestamp': '2025-10-01 04:15:47.025950', 'step': 230, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:47.078640', 'step': 230, 'epoch': 1} {'type': 'loss', 'content': 0.17988409101963043, 'timestamp': '2025-10-01 04:15:47.081053', 'step': 231, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:47.134206', 'step': 231, 'epoch': 1} {'type': 'loss', 'content': 0.3011527955532074, 'timestamp': '2025-10-01 04:15:47.139748', 'step': 232, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:47.192521', 'step': 232, 'epoch': 1} {'type': 'loss', 'content': 0.23795540630817413, 'timestamp': '2025-10-01 04:15:47.194354', 'step': 233, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:47.246946', 'step': 233, 'epoch': 1} {'type': 'loss', 'content': 0.18280839920043945, 'timestamp': '2025-10-01 04:15:47.248758', 'step': 234, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:15:47.301663', 'step': 234, 'epoch': 1} {'type': 'loss', 'content': 0.25961625576019287, 'timestamp': '2025-10-01 04:15:47.309492', 'step': 235, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:15:47.368245', 'step': 235, 'epoch': 1} {'type': 'loss', 'content': 0.15029491484165192, 'timestamp': '2025-10-01 04:15:47.373958', 'step': 236, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:47.427526', 'step': 236, 'epoch': 1} {'type': 'loss', 'content': 0.2544286847114563, 'timestamp': '2025-10-01 04:15:47.430523', 'step': 237, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:47.484203', 'step': 237, 'epoch': 1} {'type': 'loss', 'content': 0.2720199227333069, 'timestamp': '2025-10-01 04:15:47.486676', 'step': 238, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:47.539885', 'step': 238, 'epoch': 1} {'type': 'loss', 'content': 0.16984093189239502, 'timestamp': '2025-10-01 04:15:47.548781', 'step': 239, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:15:47.602625', 'step': 239, 'epoch': 1} {'type': 'loss', 'content': 0.21228794753551483, 'timestamp': '2025-10-01 04:15:47.608202', 'step': 240, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:47.661166', 'step': 240, 'epoch': 1} {'type': 'loss', 'content': 0.22570469975471497, 'timestamp': '2025-10-01 04:15:47.663023', 'step': 241, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:47.716693', 'step': 241, 'epoch': 1} {'type': 'loss', 'content': 0.20525139570236206, 'timestamp': '2025-10-01 04:15:47.724899', 'step': 242, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:47.778177', 'step': 242, 'epoch': 1} {'type': 'loss', 'content': 0.3758407235145569, 'timestamp': '2025-10-01 04:15:47.786083', 'step': 243, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:47.849626', 'step': 243, 'epoch': 1} {'type': 'loss', 'content': 0.2223312258720398, 'timestamp': '2025-10-01 04:15:47.855357', 'step': 244, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:47.909124', 'step': 244, 'epoch': 1} {'type': 'loss', 'content': 0.2864076495170593, 'timestamp': '2025-10-01 04:15:47.911030', 'step': 245, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:47.964460', 'step': 245, 'epoch': 1} {'type': 'loss', 'content': 0.18541055917739868, 'timestamp': '2025-10-01 04:15:47.966526', 'step': 246, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:48.024904', 'step': 246, 'epoch': 1} {'type': 'loss', 'content': 0.16626639664173126, 'timestamp': '2025-10-01 04:15:48.026947', 'step': 247, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:48.079842', 'step': 247, 'epoch': 1} {'type': 'loss', 'content': 0.1389874517917633, 'timestamp': '2025-10-01 04:15:48.085460', 'step': 248, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:15:48.143689', 'step': 248, 'epoch': 1} {'type': 'loss', 'content': 0.2692122161388397, 'timestamp': '2025-10-01 04:15:48.146468', 'step': 249, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:48.208681', 'step': 249, 'epoch': 1} {'type': 'loss', 'content': 0.21283137798309326, 'timestamp': '2025-10-01 04:15:48.218471', 'step': 250, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:48.272712', 'step': 250, 'epoch': 1} {'type': 'loss', 'content': 0.19764065742492676, 'timestamp': '2025-10-01 04:15:48.274841', 'step': 251, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:48.327848', 'step': 251, 'epoch': 1} {'type': 'loss', 'content': 0.20546525716781616, 'timestamp': '2025-10-01 04:15:48.336397', 'step': 252, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:48.393789', 'step': 252, 'epoch': 1} {'type': 'loss', 'content': 0.22762556374073029, 'timestamp': '2025-10-01 04:15:48.396835', 'step': 253, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:48.450971', 'step': 253, 'epoch': 1} {'type': 'loss', 'content': 0.2013140171766281, 'timestamp': '2025-10-01 04:15:48.453031', 'step': 254, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:48.505571', 'step': 254, 'epoch': 1} {'type': 'loss', 'content': 0.1275681108236313, 'timestamp': '2025-10-01 04:15:48.507708', 'step': 255, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:48.565599', 'step': 255, 'epoch': 1} {'type': 'loss', 'content': 0.15323524177074432, 'timestamp': '2025-10-01 04:15:48.576922', 'step': 256, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:48.633224', 'step': 256, 'epoch': 1} {'type': 'loss', 'content': 0.24936844408512115, 'timestamp': '2025-10-01 04:15:48.636058', 'step': 257, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:48.697126', 'step': 257, 'epoch': 1} {'type': 'loss', 'content': 0.1486782729625702, 'timestamp': '2025-10-01 04:15:48.699547', 'step': 258, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:48.752694', 'step': 258, 'epoch': 1} {'type': 'loss', 'content': 0.13054277002811432, 'timestamp': '2025-10-01 04:15:48.755267', 'step': 259, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:48.808196', 'step': 259, 'epoch': 1} {'type': 'loss', 'content': 0.24801254272460938, 'timestamp': '2025-10-01 04:15:48.813477', 'step': 260, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:48.866806', 'step': 260, 'epoch': 1} {'type': 'loss', 'content': 0.16747570037841797, 'timestamp': '2025-10-01 04:15:48.869208', 'step': 261, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:48.922835', 'step': 261, 'epoch': 1} {'type': 'loss', 'content': 0.16979137063026428, 'timestamp': '2025-10-01 04:15:48.925080', 'step': 262, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:48.977886', 'step': 262, 'epoch': 1} {'type': 'loss', 'content': 0.2156611680984497, 'timestamp': '2025-10-01 04:15:48.981275', 'step': 263, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-01 04:15:49.055411', 'step': 263, 'epoch': 1} {'type': 'loss', 'content': 0.24147287011146545, 'timestamp': '2025-10-01 04:15:49.061002', 'step': 264, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:49.114013', 'step': 264, 'epoch': 1} {'type': 'loss', 'content': 0.23615220189094543, 'timestamp': '2025-10-01 04:15:49.116205', 'step': 265, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:49.169450', 'step': 265, 'epoch': 1} {'type': 'loss', 'content': 0.15290901064872742, 'timestamp': '2025-10-01 04:15:49.171667', 'step': 266, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:49.226081', 'step': 266, 'epoch': 1} {'type': 'loss', 'content': 0.13964684307575226, 'timestamp': '2025-10-01 04:15:49.228108', 'step': 267, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:49.280902', 'step': 267, 'epoch': 1} {'type': 'loss', 'content': 0.27686482667922974, 'timestamp': '2025-10-01 04:15:49.286520', 'step': 268, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:49.339149', 'step': 268, 'epoch': 1} {'type': 'loss', 'content': 0.18314801156520844, 'timestamp': '2025-10-01 04:15:49.342714', 'step': 269, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:49.395646', 'step': 269, 'epoch': 1} {'type': 'loss', 'content': 0.1659119427204132, 'timestamp': '2025-10-01 04:15:49.397662', 'step': 270, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:15:49.455547', 'step': 270, 'epoch': 1} {'type': 'loss', 'content': 0.24894821643829346, 'timestamp': '2025-10-01 04:15:49.457903', 'step': 271, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:49.511088', 'step': 271, 'epoch': 1} {'type': 'loss', 'content': 0.2347007840871811, 'timestamp': '2025-10-01 04:15:49.516765', 'step': 272, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:49.569244', 'step': 272, 'epoch': 1} {'type': 'loss', 'content': 0.14319977164268494, 'timestamp': '2025-10-01 04:15:49.572436', 'step': 273, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:49.633002', 'step': 273, 'epoch': 1} {'type': 'loss', 'content': 0.20686458051204681, 'timestamp': '2025-10-01 04:15:49.634980', 'step': 274, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:49.687608', 'step': 274, 'epoch': 1} {'type': 'loss', 'content': 0.18200016021728516, 'timestamp': '2025-10-01 04:15:49.689564', 'step': 275, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:49.743929', 'step': 275, 'epoch': 1} {'type': 'loss', 'content': 0.15734271705150604, 'timestamp': '2025-10-01 04:15:49.749517', 'step': 276, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:49.805353', 'step': 276, 'epoch': 1} {'type': 'loss', 'content': 0.14761076867580414, 'timestamp': '2025-10-01 04:15:49.808281', 'step': 277, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:49.870256', 'step': 277, 'epoch': 1} {'type': 'loss', 'content': 0.2838197350502014, 'timestamp': '2025-10-01 04:15:49.881070', 'step': 278, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:49.949802', 'step': 278, 'epoch': 1} {'type': 'loss', 'content': 0.19871385395526886, 'timestamp': '2025-10-01 04:15:49.951840', 'step': 279, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:50.005308', 'step': 279, 'epoch': 1} {'type': 'loss', 'content': 0.2182476818561554, 'timestamp': '2025-10-01 04:15:50.010974', 'step': 280, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:50.063697', 'step': 280, 'epoch': 1} {'type': 'loss', 'content': 0.1810089647769928, 'timestamp': '2025-10-01 04:15:50.065956', 'step': 281, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:50.124208', 'step': 281, 'epoch': 1} {'type': 'loss', 'content': 0.17271101474761963, 'timestamp': '2025-10-01 04:15:50.139782', 'step': 282, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:15:50.207198', 'step': 282, 'epoch': 1} {'type': 'loss', 'content': 0.18929679691791534, 'timestamp': '2025-10-01 04:15:50.216893', 'step': 283, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:15:50.270372', 'step': 283, 'epoch': 1} {'type': 'loss', 'content': 0.1854693442583084, 'timestamp': '2025-10-01 04:15:50.283623', 'step': 284, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:15:50.341792', 'step': 284, 'epoch': 1} {'type': 'loss', 'content': 0.22579103708267212, 'timestamp': '2025-10-01 04:15:50.343794', 'step': 285, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:50.397100', 'step': 285, 'epoch': 1} {'type': 'loss', 'content': 0.2603965997695923, 'timestamp': '2025-10-01 04:15:50.399402', 'step': 286, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:50.453351', 'step': 286, 'epoch': 1} {'type': 'loss', 'content': 0.31775301694869995, 'timestamp': '2025-10-01 04:15:50.455718', 'step': 287, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:50.509913', 'step': 287, 'epoch': 1} {'type': 'loss', 'content': 0.3352530598640442, 'timestamp': '2025-10-01 04:15:50.515466', 'step': 288, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:50.589449', 'step': 288, 'epoch': 1} {'type': 'loss', 'content': 0.2737272381782532, 'timestamp': '2025-10-01 04:15:50.591407', 'step': 289, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:15:50.644119', 'step': 289, 'epoch': 1} {'type': 'loss', 'content': 0.17628708481788635, 'timestamp': '2025-10-01 04:15:50.654032', 'step': 290, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:15:50.706831', 'step': 290, 'epoch': 1} {'type': 'loss', 'content': 0.1763935089111328, 'timestamp': '2025-10-01 04:15:50.709592', 'step': 291, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:50.762471', 'step': 291, 'epoch': 1} {'type': 'loss', 'content': 0.27966296672821045, 'timestamp': '2025-10-01 04:15:50.770579', 'step': 292, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:15:50.823609', 'step': 292, 'epoch': 1} {'type': 'loss', 'content': 0.33157023787498474, 'timestamp': '2025-10-01 04:15:50.826249', 'step': 293, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:50.883626', 'step': 293, 'epoch': 1} {'type': 'loss', 'content': 0.20926474034786224, 'timestamp': '2025-10-01 04:15:50.891940', 'step': 294, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:50.959671', 'step': 294, 'epoch': 1} {'type': 'loss', 'content': 0.1818857640028, 'timestamp': '2025-10-01 04:15:50.966543', 'step': 295, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:51.027892', 'step': 295, 'epoch': 1} {'type': 'loss', 'content': 0.1688448190689087, 'timestamp': '2025-10-01 04:15:51.033497', 'step': 296, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:15:51.087183', 'step': 296, 'epoch': 1} {'type': 'loss', 'content': 0.1737746298313141, 'timestamp': '2025-10-01 04:15:51.089473', 'step': 297, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:51.159025', 'step': 297, 'epoch': 1} {'type': 'loss', 'content': 0.13755875825881958, 'timestamp': '2025-10-01 04:15:51.162538', 'step': 298, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:51.218843', 'step': 298, 'epoch': 1} {'type': 'loss', 'content': 0.24782225489616394, 'timestamp': '2025-10-01 04:15:51.229221', 'step': 299, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:51.283217', 'step': 299, 'epoch': 1} {'type': 'loss', 'content': 0.21492411196231842, 'timestamp': '2025-10-01 04:15:51.289533', 'step': 300, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:51.351127', 'step': 300, 'epoch': 1} {'type': 'loss', 'content': 0.28654035925865173, 'timestamp': '2025-10-01 04:15:51.353659', 'step': 301, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:15:51.408728', 'step': 301, 'epoch': 1} {'type': 'loss', 'content': 0.24946944415569305, 'timestamp': '2025-10-01 04:15:51.411356', 'step': 302, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:51.465544', 'step': 302, 'epoch': 1} {'type': 'loss', 'content': 0.19849660992622375, 'timestamp': '2025-10-01 04:15:51.467799', 'step': 303, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:51.523649', 'step': 303, 'epoch': 1} {'type': 'loss', 'content': 0.20078144967556, 'timestamp': '2025-10-01 04:15:51.541324', 'step': 304, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-01 04:15:51.619686', 'step': 304, 'epoch': 1} {'type': 'loss', 'content': 0.22477583587169647, 'timestamp': '2025-10-01 04:15:51.632462', 'step': 305, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:51.694325', 'step': 305, 'epoch': 1} {'type': 'loss', 'content': 0.28601574897766113, 'timestamp': '2025-10-01 04:15:51.696601', 'step': 306, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:51.777552', 'step': 306, 'epoch': 1} {'type': 'loss', 'content': 0.23184604942798615, 'timestamp': '2025-10-01 04:15:51.788381', 'step': 307, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:51.851360', 'step': 307, 'epoch': 1} {'type': 'loss', 'content': 0.1669131964445114, 'timestamp': '2025-10-01 04:15:51.857292', 'step': 308, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:51.910503', 'step': 308, 'epoch': 1} {'type': 'loss', 'content': 0.1640084981918335, 'timestamp': '2025-10-01 04:15:51.912704', 'step': 309, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:15:51.971970', 'step': 309, 'epoch': 1} {'type': 'loss', 'content': 0.22566182911396027, 'timestamp': '2025-10-01 04:15:51.974222', 'step': 310, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:52.027438', 'step': 310, 'epoch': 1} {'type': 'loss', 'content': 0.20943808555603027, 'timestamp': '2025-10-01 04:15:52.029496', 'step': 311, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:52.092087', 'step': 311, 'epoch': 1} {'type': 'loss', 'content': 0.18441665172576904, 'timestamp': '2025-10-01 04:15:52.097813', 'step': 312, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:52.150799', 'step': 312, 'epoch': 1} {'type': 'loss', 'content': 0.14031945168972015, 'timestamp': '2025-10-01 04:15:52.153422', 'step': 313, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:52.208325', 'step': 313, 'epoch': 1} {'type': 'loss', 'content': 0.20755498111248016, 'timestamp': '2025-10-01 04:15:52.210596', 'step': 314, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:52.265806', 'step': 314, 'epoch': 1} {'type': 'loss', 'content': 0.25627389550209045, 'timestamp': '2025-10-01 04:15:52.268482', 'step': 315, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:52.322588', 'step': 315, 'epoch': 1} {'type': 'loss', 'content': 0.1501908153295517, 'timestamp': '2025-10-01 04:15:52.328420', 'step': 316, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:52.384190', 'step': 316, 'epoch': 1} {'type': 'loss', 'content': 0.17557160556316376, 'timestamp': '2025-10-01 04:15:52.393367', 'step': 317, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:52.446689', 'step': 317, 'epoch': 1} {'type': 'loss', 'content': 0.13664022088050842, 'timestamp': '2025-10-01 04:15:52.449219', 'step': 318, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:52.508079', 'step': 318, 'epoch': 1} {'type': 'loss', 'content': 0.2126491814851761, 'timestamp': '2025-10-01 04:15:52.520617', 'step': 319, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:52.574016', 'step': 319, 'epoch': 1} {'type': 'loss', 'content': 0.2540343403816223, 'timestamp': '2025-10-01 04:15:52.579805', 'step': 320, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:52.632785', 'step': 320, 'epoch': 1} {'type': 'loss', 'content': 0.21812720596790314, 'timestamp': '2025-10-01 04:15:52.644586', 'step': 321, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:52.698707', 'step': 321, 'epoch': 1} {'type': 'loss', 'content': 0.18632522225379944, 'timestamp': '2025-10-01 04:15:52.701046', 'step': 322, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:52.754061', 'step': 322, 'epoch': 1} {'type': 'loss', 'content': 0.20899660885334015, 'timestamp': '2025-10-01 04:15:52.756267', 'step': 323, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:52.818471', 'step': 323, 'epoch': 1} {'type': 'loss', 'content': 0.18486440181732178, 'timestamp': '2025-10-01 04:15:52.823987', 'step': 324, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:15:52.876798', 'step': 324, 'epoch': 1} {'type': 'loss', 'content': 0.16535882651805878, 'timestamp': '2025-10-01 04:15:52.878786', 'step': 325, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:52.932706', 'step': 325, 'epoch': 1} {'type': 'loss', 'content': 0.2996715009212494, 'timestamp': '2025-10-01 04:15:52.934805', 'step': 326, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:52.989304', 'step': 326, 'epoch': 1} {'type': 'loss', 'content': 0.12152928113937378, 'timestamp': '2025-10-01 04:15:52.991268', 'step': 327, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:53.043823', 'step': 327, 'epoch': 1} {'type': 'loss', 'content': 0.22534629702568054, 'timestamp': '2025-10-01 04:15:53.049455', 'step': 328, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:53.101904', 'step': 328, 'epoch': 1} {'type': 'loss', 'content': 0.2506791651248932, 'timestamp': '2025-10-01 04:15:53.103903', 'step': 329, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:15:53.180508', 'step': 329, 'epoch': 1} {'type': 'loss', 'content': 0.16485975682735443, 'timestamp': '2025-10-01 04:15:53.182799', 'step': 330, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:15:53.241964', 'step': 330, 'epoch': 1} {'type': 'loss', 'content': 0.17644625902175903, 'timestamp': '2025-10-01 04:15:53.244189', 'step': 331, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:53.308108', 'step': 331, 'epoch': 1} {'type': 'loss', 'content': 0.25329720973968506, 'timestamp': '2025-10-01 04:15:53.313718', 'step': 332, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:53.374712', 'step': 332, 'epoch': 1} {'type': 'loss', 'content': 0.1435125172138214, 'timestamp': '2025-10-01 04:15:53.377049', 'step': 333, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:53.430855', 'step': 333, 'epoch': 1} {'type': 'loss', 'content': 0.2750914990901947, 'timestamp': '2025-10-01 04:15:53.442103', 'step': 334, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:53.514397', 'step': 334, 'epoch': 1} {'type': 'loss', 'content': 0.1258966326713562, 'timestamp': '2025-10-01 04:15:53.517531', 'step': 335, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:53.571536', 'step': 335, 'epoch': 1} {'type': 'loss', 'content': 0.20007602870464325, 'timestamp': '2025-10-01 04:15:53.577092', 'step': 336, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:53.629399', 'step': 336, 'epoch': 1} {'type': 'loss', 'content': 0.19998082518577576, 'timestamp': '2025-10-01 04:15:53.632332', 'step': 337, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:15:53.694424', 'step': 337, 'epoch': 1} {'type': 'loss', 'content': 0.1679486781358719, 'timestamp': '2025-10-01 04:15:53.706033', 'step': 338, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:15:53.759860', 'step': 338, 'epoch': 1} {'type': 'loss', 'content': 0.3078869581222534, 'timestamp': '2025-10-01 04:15:53.762012', 'step': 339, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:53.820685', 'step': 339, 'epoch': 1} {'type': 'loss', 'content': 0.26965150237083435, 'timestamp': '2025-10-01 04:15:53.826784', 'step': 340, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:53.886166', 'step': 340, 'epoch': 1} {'type': 'loss', 'content': 0.24253906309604645, 'timestamp': '2025-10-01 04:15:53.888079', 'step': 341, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:53.941021', 'step': 341, 'epoch': 1} {'type': 'loss', 'content': 0.16658155620098114, 'timestamp': '2025-10-01 04:15:53.943079', 'step': 342, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:53.995759', 'step': 342, 'epoch': 1} {'type': 'loss', 'content': 0.21920199692249298, 'timestamp': '2025-10-01 04:15:53.997670', 'step': 343, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:54.055077', 'step': 343, 'epoch': 1} {'type': 'loss', 'content': 0.1450963020324707, 'timestamp': '2025-10-01 04:15:54.061885', 'step': 344, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:15:54.124987', 'step': 344, 'epoch': 1} {'type': 'loss', 'content': 0.2723483145236969, 'timestamp': '2025-10-01 04:15:54.127107', 'step': 345, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:54.180242', 'step': 345, 'epoch': 1} {'type': 'loss', 'content': 0.2643912136554718, 'timestamp': '2025-10-01 04:15:54.182198', 'step': 346, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:54.234832', 'step': 346, 'epoch': 1} {'type': 'loss', 'content': 0.2652798593044281, 'timestamp': '2025-10-01 04:15:54.236880', 'step': 347, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:54.290032', 'step': 347, 'epoch': 1} {'type': 'loss', 'content': 0.22537606954574585, 'timestamp': '2025-10-01 04:15:54.295638', 'step': 348, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:54.347928', 'step': 348, 'epoch': 1} {'type': 'loss', 'content': 0.1947948932647705, 'timestamp': '2025-10-01 04:15:54.349979', 'step': 349, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:54.405328', 'step': 349, 'epoch': 1} {'type': 'loss', 'content': 0.16954092681407928, 'timestamp': '2025-10-01 04:15:54.407297', 'step': 350, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:54.463384', 'step': 350, 'epoch': 1} {'type': 'loss', 'content': 0.16700653731822968, 'timestamp': '2025-10-01 04:15:54.465988', 'step': 351, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:54.518549', 'step': 351, 'epoch': 1} {'type': 'loss', 'content': 0.19353333115577698, 'timestamp': '2025-10-01 04:15:54.524197', 'step': 352, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:15:54.576956', 'step': 352, 'epoch': 1} {'type': 'loss', 'content': 0.13787265121936798, 'timestamp': '2025-10-01 04:15:54.580566', 'step': 353, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:54.634486', 'step': 353, 'epoch': 1} {'type': 'loss', 'content': 0.11605308949947357, 'timestamp': '2025-10-01 04:15:54.636757', 'step': 354, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:54.699501', 'step': 354, 'epoch': 1} {'type': 'loss', 'content': 0.19254092872142792, 'timestamp': '2025-10-01 04:15:54.701686', 'step': 355, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:54.754178', 'step': 355, 'epoch': 1} {'type': 'loss', 'content': 0.3440154492855072, 'timestamp': '2025-10-01 04:15:54.759811', 'step': 356, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:15:54.812302', 'step': 356, 'epoch': 1} {'type': 'loss', 'content': 0.21502497792243958, 'timestamp': '2025-10-01 04:15:54.814449', 'step': 357, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:54.866822', 'step': 357, 'epoch': 1} {'type': 'loss', 'content': 0.08681442588567734, 'timestamp': '2025-10-01 04:15:54.869561', 'step': 358, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:54.922565', 'step': 358, 'epoch': 1} {'type': 'loss', 'content': 0.2178223729133606, 'timestamp': '2025-10-01 04:15:54.924877', 'step': 359, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:54.978419', 'step': 359, 'epoch': 1} {'type': 'loss', 'content': 0.2229776680469513, 'timestamp': '2025-10-01 04:15:54.984352', 'step': 360, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:55.037612', 'step': 360, 'epoch': 1} {'type': 'loss', 'content': 0.16338370740413666, 'timestamp': '2025-10-01 04:15:55.039630', 'step': 361, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:55.094101', 'step': 361, 'epoch': 1} {'type': 'loss', 'content': 0.20656776428222656, 'timestamp': '2025-10-01 04:15:55.096237', 'step': 362, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:55.148605', 'step': 362, 'epoch': 1} {'type': 'loss', 'content': 0.15979385375976562, 'timestamp': '2025-10-01 04:15:55.150457', 'step': 363, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:55.203975', 'step': 363, 'epoch': 1} {'type': 'loss', 'content': 0.1840265989303589, 'timestamp': '2025-10-01 04:15:55.209373', 'step': 364, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:15:55.261843', 'step': 364, 'epoch': 1} {'type': 'loss', 'content': 0.24517101049423218, 'timestamp': '2025-10-01 04:15:55.264670', 'step': 365, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:55.322424', 'step': 365, 'epoch': 1} {'type': 'loss', 'content': 0.19741825759410858, 'timestamp': '2025-10-01 04:15:55.324359', 'step': 366, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:15:55.377121', 'step': 366, 'epoch': 1} {'type': 'loss', 'content': 0.3315110206604004, 'timestamp': '2025-10-01 04:15:55.379044', 'step': 367, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:55.431917', 'step': 367, 'epoch': 1} {'type': 'loss', 'content': 0.1308298408985138, 'timestamp': '2025-10-01 04:15:55.438181', 'step': 368, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:55.490752', 'step': 368, 'epoch': 1} {'type': 'loss', 'content': 0.2316317856311798, 'timestamp': '2025-10-01 04:15:55.492880', 'step': 369, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:55.554787', 'step': 369, 'epoch': 1} {'type': 'loss', 'content': 0.12344661355018616, 'timestamp': '2025-10-01 04:15:55.556899', 'step': 370, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:55.609837', 'step': 370, 'epoch': 1} {'type': 'loss', 'content': 0.2261449545621872, 'timestamp': '2025-10-01 04:15:55.612101', 'step': 371, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:55.664738', 'step': 371, 'epoch': 1} {'type': 'loss', 'content': 0.25561803579330444, 'timestamp': '2025-10-01 04:15:55.671613', 'step': 372, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:15:55.724145', 'step': 372, 'epoch': 1} {'type': 'loss', 'content': 0.14445801079273224, 'timestamp': '2025-10-01 04:15:55.727162', 'step': 373, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:55.779783', 'step': 373, 'epoch': 1} {'type': 'loss', 'content': 0.2074051797389984, 'timestamp': '2025-10-01 04:15:55.781987', 'step': 374, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:55.834812', 'step': 374, 'epoch': 1} {'type': 'loss', 'content': 0.15053772926330566, 'timestamp': '2025-10-01 04:15:55.836706', 'step': 375, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:55.889173', 'step': 375, 'epoch': 1} {'type': 'loss', 'content': 0.14843066036701202, 'timestamp': '2025-10-01 04:15:55.895025', 'step': 376, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:55.946800', 'step': 376, 'epoch': 1} {'type': 'loss', 'content': 0.20584283769130707, 'timestamp': '2025-10-01 04:15:55.948785', 'step': 377, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:56.001441', 'step': 377, 'epoch': 1} {'type': 'loss', 'content': 0.16745567321777344, 'timestamp': '2025-10-01 04:15:56.003786', 'step': 378, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:56.056660', 'step': 378, 'epoch': 1} {'type': 'loss', 'content': 0.20199505984783173, 'timestamp': '2025-10-01 04:15:56.059636', 'step': 379, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:56.112507', 'step': 379, 'epoch': 1} {'type': 'loss', 'content': 0.29419946670532227, 'timestamp': '2025-10-01 04:15:56.117980', 'step': 380, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:56.170411', 'step': 380, 'epoch': 1} {'type': 'loss', 'content': 0.2471538782119751, 'timestamp': '2025-10-01 04:15:56.173559', 'step': 381, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:56.226703', 'step': 381, 'epoch': 1} {'type': 'loss', 'content': 0.328779011964798, 'timestamp': '2025-10-01 04:15:56.228742', 'step': 382, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:56.282489', 'step': 382, 'epoch': 1} {'type': 'loss', 'content': 0.16251985728740692, 'timestamp': '2025-10-01 04:15:56.284572', 'step': 383, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:56.337358', 'step': 383, 'epoch': 1} {'type': 'loss', 'content': 0.22321844100952148, 'timestamp': '2025-10-01 04:15:56.343439', 'step': 384, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:56.396872', 'step': 384, 'epoch': 1} {'type': 'loss', 'content': 0.19819270074367523, 'timestamp': '2025-10-01 04:15:56.398716', 'step': 385, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:56.451361', 'step': 385, 'epoch': 1} {'type': 'loss', 'content': 0.1741032749414444, 'timestamp': '2025-10-01 04:15:56.454378', 'step': 386, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:56.509167', 'step': 386, 'epoch': 1} {'type': 'loss', 'content': 0.13496945798397064, 'timestamp': '2025-10-01 04:15:56.516838', 'step': 387, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:15:56.577049', 'step': 387, 'epoch': 1} {'type': 'loss', 'content': 0.2574458122253418, 'timestamp': '2025-10-01 04:15:56.582709', 'step': 388, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:56.635363', 'step': 388, 'epoch': 1} {'type': 'loss', 'content': 0.17921343445777893, 'timestamp': '2025-10-01 04:15:56.637312', 'step': 389, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:15:56.699411', 'step': 389, 'epoch': 1} {'type': 'loss', 'content': 0.21121658384799957, 'timestamp': '2025-10-01 04:15:56.701272', 'step': 390, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:56.754352', 'step': 390, 'epoch': 1} {'type': 'loss', 'content': 0.24056360125541687, 'timestamp': '2025-10-01 04:15:56.756195', 'step': 391, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:15:56.817419', 'step': 391, 'epoch': 1} {'type': 'loss', 'content': 0.26381924748420715, 'timestamp': '2025-10-01 04:15:56.822758', 'step': 392, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:56.874661', 'step': 392, 'epoch': 1} {'type': 'loss', 'content': 0.17931845784187317, 'timestamp': '2025-10-01 04:15:56.876472', 'step': 393, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:56.938723', 'step': 393, 'epoch': 1} {'type': 'loss', 'content': 0.13769620656967163, 'timestamp': '2025-10-01 04:15:56.940708', 'step': 394, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:15:56.993418', 'step': 394, 'epoch': 1} {'type': 'loss', 'content': 0.30162692070007324, 'timestamp': '2025-10-01 04:15:56.995535', 'step': 395, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:57.048449', 'step': 395, 'epoch': 1} {'type': 'loss', 'content': 0.12128307670354843, 'timestamp': '2025-10-01 04:15:57.054022', 'step': 396, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:57.106852', 'step': 396, 'epoch': 1} {'type': 'loss', 'content': 0.1340426206588745, 'timestamp': '2025-10-01 04:15:57.109156', 'step': 397, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:57.161820', 'step': 397, 'epoch': 1} {'type': 'loss', 'content': 0.2685408890247345, 'timestamp': '2025-10-01 04:15:57.163899', 'step': 398, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:57.217780', 'step': 398, 'epoch': 1} {'type': 'loss', 'content': 0.2243635505437851, 'timestamp': '2025-10-01 04:15:57.219545', 'step': 399, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:57.272156', 'step': 399, 'epoch': 1} {'type': 'loss', 'content': 0.14377252757549286, 'timestamp': '2025-10-01 04:15:57.277736', 'step': 400, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:15:57.340873', 'step': 400, 'epoch': 1} {'type': 'loss', 'content': 0.26392051577568054, 'timestamp': '2025-10-01 04:15:57.342728', 'step': 401, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:57.395150', 'step': 401, 'epoch': 1} {'type': 'loss', 'content': 0.2028050273656845, 'timestamp': '2025-10-01 04:15:57.397472', 'step': 402, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:15:57.450624', 'step': 402, 'epoch': 1} {'type': 'loss', 'content': 0.1773066371679306, 'timestamp': '2025-10-01 04:15:57.452655', 'step': 403, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:57.506666', 'step': 403, 'epoch': 1} {'type': 'loss', 'content': 0.14693830907344818, 'timestamp': '2025-10-01 04:15:57.512236', 'step': 404, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:57.564438', 'step': 404, 'epoch': 1} {'type': 'loss', 'content': 0.20835310220718384, 'timestamp': '2025-10-01 04:15:57.566266', 'step': 405, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:15:57.621029', 'step': 405, 'epoch': 1} {'type': 'loss', 'content': 0.21178710460662842, 'timestamp': '2025-10-01 04:15:57.624674', 'step': 406, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:57.679422', 'step': 406, 'epoch': 1} {'type': 'loss', 'content': 0.15348376333713531, 'timestamp': '2025-10-01 04:15:57.681484', 'step': 407, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:15:57.733996', 'step': 407, 'epoch': 1} {'type': 'loss', 'content': 0.19206403195858002, 'timestamp': '2025-10-01 04:15:57.739743', 'step': 408, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:15:57.791609', 'step': 408, 'epoch': 1} {'type': 'loss', 'content': 0.13938286900520325, 'timestamp': '2025-10-01 04:15:57.801171', 'step': 409, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:57.854170', 'step': 409, 'epoch': 1} {'type': 'loss', 'content': 0.23369254171848297, 'timestamp': '2025-10-01 04:15:57.856159', 'step': 410, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:57.909004', 'step': 410, 'epoch': 1} {'type': 'loss', 'content': 0.2402820736169815, 'timestamp': '2025-10-01 04:15:57.910814', 'step': 411, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:15:57.963366', 'step': 411, 'epoch': 1} {'type': 'loss', 'content': 0.1984441876411438, 'timestamp': '2025-10-01 04:15:57.968781', 'step': 412, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:58.032522', 'step': 412, 'epoch': 1} {'type': 'loss', 'content': 0.20241981744766235, 'timestamp': '2025-10-01 04:15:58.034339', 'step': 413, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:58.087273', 'step': 413, 'epoch': 1} {'type': 'loss', 'content': 0.2230377495288849, 'timestamp': '2025-10-01 04:15:58.089364', 'step': 414, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:58.142168', 'step': 414, 'epoch': 1} {'type': 'loss', 'content': 0.1760142594575882, 'timestamp': '2025-10-01 04:15:58.144467', 'step': 415, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:58.198187', 'step': 415, 'epoch': 1} {'type': 'loss', 'content': 0.17112034559249878, 'timestamp': '2025-10-01 04:15:58.203762', 'step': 416, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:58.257579', 'step': 416, 'epoch': 1} {'type': 'loss', 'content': 0.12456905096769333, 'timestamp': '2025-10-01 04:15:58.260028', 'step': 417, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:58.313400', 'step': 417, 'epoch': 1} {'type': 'loss', 'content': 0.23802484571933746, 'timestamp': '2025-10-01 04:15:58.315521', 'step': 418, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:15:58.368840', 'step': 418, 'epoch': 1} {'type': 'loss', 'content': 0.12205112725496292, 'timestamp': '2025-10-01 04:15:58.370713', 'step': 419, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:58.423486', 'step': 419, 'epoch': 1} {'type': 'loss', 'content': 0.1685473620891571, 'timestamp': '2025-10-01 04:15:58.429122', 'step': 420, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:58.481339', 'step': 420, 'epoch': 1} {'type': 'loss', 'content': 0.1695287972688675, 'timestamp': '2025-10-01 04:15:58.484093', 'step': 421, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:58.537333', 'step': 421, 'epoch': 1} {'type': 'loss', 'content': 0.22338883578777313, 'timestamp': '2025-10-01 04:15:58.539400', 'step': 422, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:58.601725', 'step': 422, 'epoch': 1} {'type': 'loss', 'content': 0.2631002366542816, 'timestamp': '2025-10-01 04:15:58.603930', 'step': 423, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:58.656600', 'step': 423, 'epoch': 1} {'type': 'loss', 'content': 0.16325418651103973, 'timestamp': '2025-10-01 04:15:58.661992', 'step': 424, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:58.714730', 'step': 424, 'epoch': 1} {'type': 'loss', 'content': 0.1517864614725113, 'timestamp': '2025-10-01 04:15:58.716895', 'step': 425, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:58.769503', 'step': 425, 'epoch': 1} {'type': 'loss', 'content': 0.21442298591136932, 'timestamp': '2025-10-01 04:15:58.771762', 'step': 426, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:58.824533', 'step': 426, 'epoch': 1} {'type': 'loss', 'content': 0.3345147967338562, 'timestamp': '2025-10-01 04:15:58.828220', 'step': 427, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:15:58.880507', 'step': 427, 'epoch': 1} {'type': 'loss', 'content': 0.2442919760942459, 'timestamp': '2025-10-01 04:15:58.886005', 'step': 428, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:58.938255', 'step': 428, 'epoch': 1} {'type': 'loss', 'content': 0.17442047595977783, 'timestamp': '2025-10-01 04:15:58.940544', 'step': 429, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:58.992814', 'step': 429, 'epoch': 1} {'type': 'loss', 'content': 0.13198211789131165, 'timestamp': '2025-10-01 04:15:58.994686', 'step': 430, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:15:59.048125', 'step': 430, 'epoch': 1} {'type': 'loss', 'content': 0.2325781136751175, 'timestamp': '2025-10-01 04:15:59.050130', 'step': 431, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:59.103145', 'step': 431, 'epoch': 1} {'type': 'loss', 'content': 0.15787635743618011, 'timestamp': '2025-10-01 04:15:59.108688', 'step': 432, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:59.161009', 'step': 432, 'epoch': 1} {'type': 'loss', 'content': 0.14848962426185608, 'timestamp': '2025-10-01 04:15:59.163093', 'step': 433, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:59.216128', 'step': 433, 'epoch': 1} {'type': 'loss', 'content': 0.16196072101593018, 'timestamp': '2025-10-01 04:15:59.217950', 'step': 434, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:59.270704', 'step': 434, 'epoch': 1} {'type': 'loss', 'content': 0.29863807559013367, 'timestamp': '2025-10-01 04:15:59.272689', 'step': 435, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:59.325373', 'step': 435, 'epoch': 1} {'type': 'loss', 'content': 0.1132538691163063, 'timestamp': '2025-10-01 04:15:59.330921', 'step': 436, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:59.385357', 'step': 436, 'epoch': 1} {'type': 'loss', 'content': 0.2355467826128006, 'timestamp': '2025-10-01 04:15:59.387708', 'step': 437, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:15:59.457791', 'step': 437, 'epoch': 1} {'type': 'loss', 'content': 0.23520076274871826, 'timestamp': '2025-10-01 04:15:59.459874', 'step': 438, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:15:59.512579', 'step': 438, 'epoch': 1} {'type': 'loss', 'content': 0.14761076867580414, 'timestamp': '2025-10-01 04:15:59.514799', 'step': 439, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:59.584841', 'step': 439, 'epoch': 1} {'type': 'loss', 'content': 0.23425205051898956, 'timestamp': '2025-10-01 04:15:59.590365', 'step': 440, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:15:59.642422', 'step': 440, 'epoch': 1} {'type': 'loss', 'content': 0.17334216833114624, 'timestamp': '2025-10-01 04:15:59.644407', 'step': 441, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:59.697620', 'step': 441, 'epoch': 1} {'type': 'loss', 'content': 0.16448627412319183, 'timestamp': '2025-10-01 04:15:59.699518', 'step': 442, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:59.752395', 'step': 442, 'epoch': 1} {'type': 'loss', 'content': 0.18019695580005646, 'timestamp': '2025-10-01 04:15:59.754435', 'step': 443, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:59.806710', 'step': 443, 'epoch': 1} {'type': 'loss', 'content': 0.2116698920726776, 'timestamp': '2025-10-01 04:15:59.812256', 'step': 444, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:59.864212', 'step': 444, 'epoch': 1} {'type': 'loss', 'content': 0.2638951539993286, 'timestamp': '2025-10-01 04:15:59.866464', 'step': 445, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:15:59.919204', 'step': 445, 'epoch': 1} {'type': 'loss', 'content': 0.22286133468151093, 'timestamp': '2025-10-01 04:15:59.921361', 'step': 446, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:15:59.975824', 'step': 446, 'epoch': 1} {'type': 'loss', 'content': 0.24555465579032898, 'timestamp': '2025-10-01 04:15:59.978140', 'step': 447, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:16:00.033006', 'step': 447, 'epoch': 1} {'type': 'loss', 'content': 0.18740275502204895, 'timestamp': '2025-10-01 04:16:00.038897', 'step': 448, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:00.092522', 'step': 448, 'epoch': 1} {'type': 'loss', 'content': 0.2616521418094635, 'timestamp': '2025-10-01 04:16:00.094649', 'step': 449, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:00.148429', 'step': 449, 'epoch': 1} {'type': 'loss', 'content': 0.17200832068920135, 'timestamp': '2025-10-01 04:16:00.151038', 'step': 450, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:00.204824', 'step': 450, 'epoch': 1} {'type': 'loss', 'content': 0.16907398402690887, 'timestamp': '2025-10-01 04:16:00.207126', 'step': 451, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:16:00.260856', 'step': 451, 'epoch': 1} {'type': 'loss', 'content': 0.2290564775466919, 'timestamp': '2025-10-01 04:16:00.266830', 'step': 452, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:00.320924', 'step': 452, 'epoch': 1} {'type': 'loss', 'content': 0.23367516696453094, 'timestamp': '2025-10-01 04:16:00.323283', 'step': 453, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:16:00.377070', 'step': 453, 'epoch': 1} {'type': 'loss', 'content': 0.1972399204969406, 'timestamp': '2025-10-01 04:16:00.392148', 'step': 454, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:16:00.457932', 'step': 454, 'epoch': 1} {'type': 'loss', 'content': 0.23387384414672852, 'timestamp': '2025-10-01 04:16:00.459997', 'step': 455, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:00.513261', 'step': 455, 'epoch': 1} {'type': 'loss', 'content': 0.246153324842453, 'timestamp': '2025-10-01 04:16:00.519895', 'step': 456, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:16:00.572374', 'step': 456, 'epoch': 1} {'type': 'loss', 'content': 0.24622511863708496, 'timestamp': '2025-10-01 04:16:00.574522', 'step': 457, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:00.629896', 'step': 457, 'epoch': 1} {'type': 'loss', 'content': 0.1906365156173706, 'timestamp': '2025-10-01 04:16:00.632083', 'step': 458, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:00.685683', 'step': 458, 'epoch': 1} {'type': 'loss', 'content': 0.2471376657485962, 'timestamp': '2025-10-01 04:16:00.696112', 'step': 459, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:00.754526', 'step': 459, 'epoch': 1} {'type': 'loss', 'content': 0.23689694702625275, 'timestamp': '2025-10-01 04:16:00.760491', 'step': 460, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:00.813065', 'step': 460, 'epoch': 1} {'type': 'loss', 'content': 0.2255501002073288, 'timestamp': '2025-10-01 04:16:00.815520', 'step': 461, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:00.880886', 'step': 461, 'epoch': 1} {'type': 'loss', 'content': 0.245527982711792, 'timestamp': '2025-10-01 04:16:00.883051', 'step': 462, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:00.936494', 'step': 462, 'epoch': 1} {'type': 'loss', 'content': 0.15164858102798462, 'timestamp': '2025-10-01 04:16:00.938558', 'step': 463, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:00.992158', 'step': 463, 'epoch': 1} {'type': 'loss', 'content': 0.2575303018093109, 'timestamp': '2025-10-01 04:16:00.997972', 'step': 464, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:16:01.050749', 'step': 464, 'epoch': 1} {'type': 'loss', 'content': 0.15863218903541565, 'timestamp': '2025-10-01 04:16:01.053119', 'step': 465, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:01.106228', 'step': 465, 'epoch': 1} {'type': 'loss', 'content': 0.19853800535202026, 'timestamp': '2025-10-01 04:16:01.108564', 'step': 466, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-01 04:16:13.890412', 'step': 466, 'epoch': 1} {'type': 'pplx', 'content': 7440.8482680613315, 'timestamp': '2025-10-01 04:16:13.893379', 'step': 466, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:13.947608', 'step': 466, 'epoch': 1} {'type': 'loss', 'content': 0.222210094332695, 'timestamp': '2025-10-01 04:16:13.949799', 'step': 467, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:14.002998', 'step': 467, 'epoch': 1} {'type': 'loss', 'content': 0.20069348812103271, 'timestamp': '2025-10-01 04:16:14.008831', 'step': 468, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:14.061574', 'step': 468, 'epoch': 1} {'type': 'loss', 'content': 0.20838096737861633, 'timestamp': '2025-10-01 04:16:14.063614', 'step': 469, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:14.116252', 'step': 469, 'epoch': 1} {'type': 'loss', 'content': 0.1091289222240448, 'timestamp': '2025-10-01 04:16:14.118778', 'step': 470, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:14.171869', 'step': 470, 'epoch': 1} {'type': 'loss', 'content': 0.2338082194328308, 'timestamp': '2025-10-01 04:16:14.174087', 'step': 471, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:14.226809', 'step': 471, 'epoch': 1} {'type': 'loss', 'content': 0.23829548060894012, 'timestamp': '2025-10-01 04:16:14.232808', 'step': 472, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:14.284842', 'step': 472, 'epoch': 1} {'type': 'loss', 'content': 0.19163072109222412, 'timestamp': '2025-10-01 04:16:14.287097', 'step': 473, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:14.340124', 'step': 473, 'epoch': 1} {'type': 'loss', 'content': 0.10884109139442444, 'timestamp': '2025-10-01 04:16:14.342207', 'step': 474, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:14.395256', 'step': 474, 'epoch': 1} {'type': 'loss', 'content': 0.2938355505466461, 'timestamp': '2025-10-01 04:16:14.397326', 'step': 475, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:14.450676', 'step': 475, 'epoch': 1} {'type': 'loss', 'content': 0.1519690603017807, 'timestamp': '2025-10-01 04:16:14.457594', 'step': 476, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:14.509632', 'step': 476, 'epoch': 1} {'type': 'loss', 'content': 0.15261578559875488, 'timestamp': '2025-10-01 04:16:14.511780', 'step': 477, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:14.564658', 'step': 477, 'epoch': 1} {'type': 'loss', 'content': 0.12835726141929626, 'timestamp': '2025-10-01 04:16:14.566660', 'step': 478, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:16:14.619850', 'step': 478, 'epoch': 1} {'type': 'loss', 'content': 0.15359629690647125, 'timestamp': '2025-10-01 04:16:14.621790', 'step': 479, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:14.674794', 'step': 479, 'epoch': 1} {'type': 'loss', 'content': 0.29354244470596313, 'timestamp': '2025-10-01 04:16:14.680471', 'step': 480, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:16:14.733241', 'step': 480, 'epoch': 1} {'type': 'loss', 'content': 0.22470013797283173, 'timestamp': '2025-10-01 04:16:14.735037', 'step': 481, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:14.787390', 'step': 481, 'epoch': 1} {'type': 'loss', 'content': 0.1856301873922348, 'timestamp': '2025-10-01 04:16:14.789405', 'step': 482, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:16:14.842490', 'step': 482, 'epoch': 1} {'type': 'loss', 'content': 0.16355787217617035, 'timestamp': '2025-10-01 04:16:14.844370', 'step': 483, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:14.898148', 'step': 483, 'epoch': 1} {'type': 'loss', 'content': 0.26427412033081055, 'timestamp': '2025-10-01 04:16:14.903831', 'step': 484, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:14.956582', 'step': 484, 'epoch': 1} {'type': 'loss', 'content': 0.18015094101428986, 'timestamp': '2025-10-01 04:16:14.958600', 'step': 485, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:15.011133', 'step': 485, 'epoch': 1} {'type': 'loss', 'content': 0.17002315819263458, 'timestamp': '2025-10-01 04:16:15.013030', 'step': 486, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:16:15.065900', 'step': 486, 'epoch': 1} {'type': 'loss', 'content': 0.1105101928114891, 'timestamp': '2025-10-01 04:16:15.067788', 'step': 487, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:15.120354', 'step': 487, 'epoch': 1} {'type': 'loss', 'content': 0.27034586668014526, 'timestamp': '2025-10-01 04:16:15.125496', 'step': 488, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:15.178087', 'step': 488, 'epoch': 1} {'type': 'loss', 'content': 0.28095075488090515, 'timestamp': '2025-10-01 04:16:15.179935', 'step': 489, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:15.232899', 'step': 489, 'epoch': 1} {'type': 'loss', 'content': 0.16646039485931396, 'timestamp': '2025-10-01 04:16:15.234470', 'step': 490, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:15.287818', 'step': 490, 'epoch': 1} {'type': 'loss', 'content': 0.1989971101284027, 'timestamp': '2025-10-01 04:16:15.289766', 'step': 491, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:15.342565', 'step': 491, 'epoch': 1} {'type': 'loss', 'content': 0.21896257996559143, 'timestamp': '2025-10-01 04:16:15.348441', 'step': 492, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:15.400151', 'step': 492, 'epoch': 1} {'type': 'loss', 'content': 0.1296209841966629, 'timestamp': '2025-10-01 04:16:15.402232', 'step': 493, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:15.455007', 'step': 493, 'epoch': 1} {'type': 'loss', 'content': 0.22670380771160126, 'timestamp': '2025-10-01 04:16:15.456982', 'step': 494, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:15.515893', 'step': 494, 'epoch': 1} {'type': 'loss', 'content': 0.22615700960159302, 'timestamp': '2025-10-01 04:16:15.517653', 'step': 495, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:16:15.570502', 'step': 495, 'epoch': 1} {'type': 'loss', 'content': 0.23667702078819275, 'timestamp': '2025-10-01 04:16:15.576572', 'step': 496, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:16:15.630169', 'step': 496, 'epoch': 1} {'type': 'loss', 'content': 0.2748464345932007, 'timestamp': '2025-10-01 04:16:15.632538', 'step': 497, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:15.685708', 'step': 497, 'epoch': 1} {'type': 'loss', 'content': 0.2105851173400879, 'timestamp': '2025-10-01 04:16:15.687772', 'step': 498, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:15.740885', 'step': 498, 'epoch': 1} {'type': 'loss', 'content': 0.23049211502075195, 'timestamp': '2025-10-01 04:16:15.743063', 'step': 499, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:15.797256', 'step': 499, 'epoch': 1} {'type': 'loss', 'content': 0.171547532081604, 'timestamp': '2025-10-01 04:16:15.803301', 'step': 500, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 500', 'timestamp': '2025-10-01 04:16:16.174231', 'step': 500, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:16:16.232872', 'step': 500, 'epoch': 1} {'type': 'loss', 'content': 0.11025220900774002, 'timestamp': '2025-10-01 04:16:16.234881', 'step': 501, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:16.289288', 'step': 501, 'epoch': 1} {'type': 'loss', 'content': 0.2284827083349228, 'timestamp': '2025-10-01 04:16:16.291290', 'step': 502, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:16.345330', 'step': 502, 'epoch': 1} {'type': 'loss', 'content': 0.15850616991519928, 'timestamp': '2025-10-01 04:16:16.347450', 'step': 503, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:16.400576', 'step': 503, 'epoch': 1} {'type': 'loss', 'content': 0.15181627869606018, 'timestamp': '2025-10-01 04:16:16.406246', 'step': 504, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:16:16.458906', 'step': 504, 'epoch': 1} {'type': 'loss', 'content': 0.1658807098865509, 'timestamp': '2025-10-01 04:16:16.460928', 'step': 505, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:16.513890', 'step': 505, 'epoch': 1} {'type': 'loss', 'content': 0.20613326132297516, 'timestamp': '2025-10-01 04:16:16.515918', 'step': 506, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:16.570015', 'step': 506, 'epoch': 1} {'type': 'loss', 'content': 0.27548250555992126, 'timestamp': '2025-10-01 04:16:16.578333', 'step': 507, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:16.632732', 'step': 507, 'epoch': 1} {'type': 'loss', 'content': 0.1382548063993454, 'timestamp': '2025-10-01 04:16:16.639616', 'step': 508, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:16.692986', 'step': 508, 'epoch': 1} {'type': 'loss', 'content': 0.18358685076236725, 'timestamp': '2025-10-01 04:16:16.695164', 'step': 509, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:16.748806', 'step': 509, 'epoch': 1} {'type': 'loss', 'content': 0.14861740171909332, 'timestamp': '2025-10-01 04:16:16.750724', 'step': 510, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:16:16.803632', 'step': 510, 'epoch': 1} {'type': 'loss', 'content': 0.13300223648548126, 'timestamp': '2025-10-01 04:16:16.805483', 'step': 511, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:16.859252', 'step': 511, 'epoch': 1} {'type': 'loss', 'content': 0.14915244281291962, 'timestamp': '2025-10-01 04:16:16.866443', 'step': 512, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:16.919423', 'step': 512, 'epoch': 1} {'type': 'loss', 'content': 0.2792799770832062, 'timestamp': '2025-10-01 04:16:16.921530', 'step': 513, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:16.974533', 'step': 513, 'epoch': 1} {'type': 'loss', 'content': 0.18575294315814972, 'timestamp': '2025-10-01 04:16:16.977005', 'step': 514, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:17.029732', 'step': 514, 'epoch': 1} {'type': 'loss', 'content': 0.2151346504688263, 'timestamp': '2025-10-01 04:16:17.031872', 'step': 515, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:17.084849', 'step': 515, 'epoch': 1} {'type': 'loss', 'content': 0.2199695110321045, 'timestamp': '2025-10-01 04:16:17.090825', 'step': 516, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:17.143812', 'step': 516, 'epoch': 1} {'type': 'loss', 'content': 0.24017687141895294, 'timestamp': '2025-10-01 04:16:17.145459', 'step': 517, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:17.197921', 'step': 517, 'epoch': 1} {'type': 'loss', 'content': 0.123841293156147, 'timestamp': '2025-10-01 04:16:17.199767', 'step': 518, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:17.252781', 'step': 518, 'epoch': 1} {'type': 'loss', 'content': 0.21397626399993896, 'timestamp': '2025-10-01 04:16:17.254422', 'step': 519, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:16:17.307107', 'step': 519, 'epoch': 1} {'type': 'loss', 'content': 0.17019778490066528, 'timestamp': '2025-10-01 04:16:17.312985', 'step': 520, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:16:17.365386', 'step': 520, 'epoch': 1} {'type': 'loss', 'content': 0.1546744704246521, 'timestamp': '2025-10-01 04:16:17.377503', 'step': 521, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:17.430537', 'step': 521, 'epoch': 1} {'type': 'loss', 'content': 0.2998850643634796, 'timestamp': '2025-10-01 04:16:17.432593', 'step': 522, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:17.485425', 'step': 522, 'epoch': 1} {'type': 'loss', 'content': 0.20776431262493134, 'timestamp': '2025-10-01 04:16:17.487466', 'step': 523, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:17.540146', 'step': 523, 'epoch': 1} {'type': 'loss', 'content': 0.19947773218154907, 'timestamp': '2025-10-01 04:16:17.545846', 'step': 524, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:17.599179', 'step': 524, 'epoch': 1} {'type': 'loss', 'content': 0.11457496881484985, 'timestamp': '2025-10-01 04:16:17.600854', 'step': 525, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:16:17.653558', 'step': 525, 'epoch': 1} {'type': 'loss', 'content': 0.2494397908449173, 'timestamp': '2025-10-01 04:16:17.655252', 'step': 526, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:17.709418', 'step': 526, 'epoch': 1} {'type': 'loss', 'content': 0.18201279640197754, 'timestamp': '2025-10-01 04:16:17.711533', 'step': 527, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:16:17.764743', 'step': 527, 'epoch': 1} {'type': 'loss', 'content': 0.17409801483154297, 'timestamp': '2025-10-01 04:16:17.772569', 'step': 528, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:17.825829', 'step': 528, 'epoch': 1} {'type': 'loss', 'content': 0.15510721504688263, 'timestamp': '2025-10-01 04:16:17.827759', 'step': 529, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:17.881212', 'step': 529, 'epoch': 1} {'type': 'loss', 'content': 0.20983727276325226, 'timestamp': '2025-10-01 04:16:17.883350', 'step': 530, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:17.936700', 'step': 530, 'epoch': 1} {'type': 'loss', 'content': 0.15496738255023956, 'timestamp': '2025-10-01 04:16:17.938798', 'step': 531, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:17.991897', 'step': 531, 'epoch': 1} {'type': 'loss', 'content': 0.16869546473026276, 'timestamp': '2025-10-01 04:16:17.997302', 'step': 532, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-01 04:16:18.052912', 'step': 532, 'epoch': 1} {'type': 'loss', 'content': 0.14798781275749207, 'timestamp': '2025-10-01 04:16:18.054554', 'step': 533, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:16:18.107820', 'step': 533, 'epoch': 1} {'type': 'loss', 'content': 0.20771759748458862, 'timestamp': '2025-10-01 04:16:18.109520', 'step': 534, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:18.162819', 'step': 534, 'epoch': 1} {'type': 'loss', 'content': 0.3887050747871399, 'timestamp': '2025-10-01 04:16:18.164850', 'step': 535, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:16:18.219506', 'step': 535, 'epoch': 1} {'type': 'loss', 'content': 0.18330132961273193, 'timestamp': '2025-10-01 04:16:18.225351', 'step': 536, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:18.277494', 'step': 536, 'epoch': 1} {'type': 'loss', 'content': 0.1329588145017624, 'timestamp': '2025-10-01 04:16:18.280198', 'step': 537, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:18.333975', 'step': 537, 'epoch': 1} {'type': 'loss', 'content': 0.09884515404701233, 'timestamp': '2025-10-01 04:16:18.339414', 'step': 538, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:18.393165', 'step': 538, 'epoch': 1} {'type': 'loss', 'content': 0.2486598789691925, 'timestamp': '2025-10-01 04:16:18.395127', 'step': 539, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:18.449275', 'step': 539, 'epoch': 1} {'type': 'loss', 'content': 0.1904747486114502, 'timestamp': '2025-10-01 04:16:18.455097', 'step': 540, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:18.508226', 'step': 540, 'epoch': 1} {'type': 'loss', 'content': 0.20040005445480347, 'timestamp': '2025-10-01 04:16:18.510580', 'step': 541, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:18.564386', 'step': 541, 'epoch': 1} {'type': 'loss', 'content': 0.19085952639579773, 'timestamp': '2025-10-01 04:16:18.567731', 'step': 542, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:18.623030', 'step': 542, 'epoch': 1} {'type': 'loss', 'content': 0.1322132647037506, 'timestamp': '2025-10-01 04:16:18.625621', 'step': 543, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:18.680307', 'step': 543, 'epoch': 1} {'type': 'loss', 'content': 0.2206982970237732, 'timestamp': '2025-10-01 04:16:18.686556', 'step': 544, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:18.740466', 'step': 544, 'epoch': 1} {'type': 'loss', 'content': 0.18400996923446655, 'timestamp': '2025-10-01 04:16:18.743055', 'step': 545, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:18.797203', 'step': 545, 'epoch': 1} {'type': 'loss', 'content': 0.22349485754966736, 'timestamp': '2025-10-01 04:16:18.798975', 'step': 546, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:18.852549', 'step': 546, 'epoch': 1} {'type': 'loss', 'content': 0.19317007064819336, 'timestamp': '2025-10-01 04:16:18.854192', 'step': 547, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:16:18.911302', 'step': 547, 'epoch': 1} {'type': 'loss', 'content': 0.2735867202281952, 'timestamp': '2025-10-01 04:16:18.917302', 'step': 548, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:18.969610', 'step': 548, 'epoch': 1} {'type': 'loss', 'content': 0.29230615496635437, 'timestamp': '2025-10-01 04:16:18.979846', 'step': 549, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:19.033105', 'step': 549, 'epoch': 1} {'type': 'loss', 'content': 0.10597309470176697, 'timestamp': '2025-10-01 04:16:19.035433', 'step': 550, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:19.089044', 'step': 550, 'epoch': 1} {'type': 'loss', 'content': 0.18999607861042023, 'timestamp': '2025-10-01 04:16:19.091133', 'step': 551, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:19.144533', 'step': 551, 'epoch': 1} {'type': 'loss', 'content': 0.23394399881362915, 'timestamp': '2025-10-01 04:16:19.152430', 'step': 552, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:19.205725', 'step': 552, 'epoch': 1} {'type': 'loss', 'content': 0.18349780142307281, 'timestamp': '2025-10-01 04:16:19.208255', 'step': 553, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:19.261748', 'step': 553, 'epoch': 1} {'type': 'loss', 'content': 0.2201651930809021, 'timestamp': '2025-10-01 04:16:19.263940', 'step': 554, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:19.317841', 'step': 554, 'epoch': 1} {'type': 'loss', 'content': 0.17900478839874268, 'timestamp': '2025-10-01 04:16:19.320020', 'step': 555, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:19.373284', 'step': 555, 'epoch': 1} {'type': 'loss', 'content': 0.13984371721744537, 'timestamp': '2025-10-01 04:16:19.379031', 'step': 556, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:19.432714', 'step': 556, 'epoch': 1} {'type': 'loss', 'content': 0.13490217924118042, 'timestamp': '2025-10-01 04:16:19.443194', 'step': 557, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:19.498566', 'step': 557, 'epoch': 1} {'type': 'loss', 'content': 0.11032652109861374, 'timestamp': '2025-10-01 04:16:19.501270', 'step': 558, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:19.554474', 'step': 558, 'epoch': 1} {'type': 'loss', 'content': 0.1630955934524536, 'timestamp': '2025-10-01 04:16:19.556453', 'step': 559, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:19.610829', 'step': 559, 'epoch': 1} {'type': 'loss', 'content': 0.2650587558746338, 'timestamp': '2025-10-01 04:16:19.616598', 'step': 560, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:19.669165', 'step': 560, 'epoch': 1} {'type': 'loss', 'content': 0.17360903322696686, 'timestamp': '2025-10-01 04:16:19.685259', 'step': 561, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:19.739480', 'step': 561, 'epoch': 1} {'type': 'loss', 'content': 0.12389769405126572, 'timestamp': '2025-10-01 04:16:19.741487', 'step': 562, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:16:19.793937', 'step': 562, 'epoch': 1} {'type': 'loss', 'content': 0.18696662783622742, 'timestamp': '2025-10-01 04:16:19.796184', 'step': 563, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:19.849409', 'step': 563, 'epoch': 1} {'type': 'loss', 'content': 0.22466665506362915, 'timestamp': '2025-10-01 04:16:19.865111', 'step': 564, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-01 04:16:19.919568', 'step': 564, 'epoch': 1} {'type': 'loss', 'content': 0.2934197187423706, 'timestamp': '2025-10-01 04:16:19.921630', 'step': 565, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:19.976572', 'step': 565, 'epoch': 1} {'type': 'loss', 'content': 0.23624597489833832, 'timestamp': '2025-10-01 04:16:19.980578', 'step': 566, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:20.036729', 'step': 566, 'epoch': 1} {'type': 'loss', 'content': 0.31385883688926697, 'timestamp': '2025-10-01 04:16:20.038783', 'step': 567, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:20.105535', 'step': 567, 'epoch': 1} {'type': 'loss', 'content': 0.17124806344509125, 'timestamp': '2025-10-01 04:16:20.110975', 'step': 568, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:20.165280', 'step': 568, 'epoch': 1} {'type': 'loss', 'content': 0.24640893936157227, 'timestamp': '2025-10-01 04:16:20.167301', 'step': 569, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:20.220290', 'step': 569, 'epoch': 1} {'type': 'loss', 'content': 0.1447150707244873, 'timestamp': '2025-10-01 04:16:20.221966', 'step': 570, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:20.275360', 'step': 570, 'epoch': 1} {'type': 'loss', 'content': 0.19153650104999542, 'timestamp': '2025-10-01 04:16:20.277439', 'step': 571, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:20.330699', 'step': 571, 'epoch': 1} {'type': 'loss', 'content': 0.18991906940937042, 'timestamp': '2025-10-01 04:16:20.336637', 'step': 572, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:20.389394', 'step': 572, 'epoch': 1} {'type': 'loss', 'content': 0.13553062081336975, 'timestamp': '2025-10-01 04:16:20.391410', 'step': 573, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:16:20.444083', 'step': 573, 'epoch': 1} {'type': 'loss', 'content': 0.3125639855861664, 'timestamp': '2025-10-01 04:16:20.446612', 'step': 574, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:20.499397', 'step': 574, 'epoch': 1} {'type': 'loss', 'content': 0.13036105036735535, 'timestamp': '2025-10-01 04:16:20.501459', 'step': 575, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:20.554009', 'step': 575, 'epoch': 1} {'type': 'loss', 'content': 0.1963120549917221, 'timestamp': '2025-10-01 04:16:20.565502', 'step': 576, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:20.618483', 'step': 576, 'epoch': 1} {'type': 'loss', 'content': 0.10022444278001785, 'timestamp': '2025-10-01 04:16:20.620364', 'step': 577, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:20.672812', 'step': 577, 'epoch': 1} {'type': 'loss', 'content': 0.2433609515428543, 'timestamp': '2025-10-01 04:16:20.675309', 'step': 578, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:20.728456', 'step': 578, 'epoch': 1} {'type': 'loss', 'content': 0.2655563950538635, 'timestamp': '2025-10-01 04:16:20.730388', 'step': 579, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:16:20.782941', 'step': 579, 'epoch': 1} {'type': 'loss', 'content': 0.17421722412109375, 'timestamp': '2025-10-01 04:16:20.788707', 'step': 580, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:20.840916', 'step': 580, 'epoch': 1} {'type': 'loss', 'content': 0.14785075187683105, 'timestamp': '2025-10-01 04:16:20.842844', 'step': 581, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:20.895101', 'step': 581, 'epoch': 1} {'type': 'loss', 'content': 0.12438071519136429, 'timestamp': '2025-10-01 04:16:20.897136', 'step': 582, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:20.951123', 'step': 582, 'epoch': 1} {'type': 'loss', 'content': 0.19922631978988647, 'timestamp': '2025-10-01 04:16:20.953136', 'step': 583, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:21.005854', 'step': 583, 'epoch': 1} {'type': 'loss', 'content': 0.20060153305530548, 'timestamp': '2025-10-01 04:16:21.011575', 'step': 584, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:16:21.063924', 'step': 584, 'epoch': 1} {'type': 'loss', 'content': 0.14336082339286804, 'timestamp': '2025-10-01 04:16:21.065840', 'step': 585, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-01 04:16:21.122702', 'step': 585, 'epoch': 1} {'type': 'loss', 'content': 0.12524396181106567, 'timestamp': '2025-10-01 04:16:21.124884', 'step': 586, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:21.178327', 'step': 586, 'epoch': 1} {'type': 'loss', 'content': 0.15438660979270935, 'timestamp': '2025-10-01 04:16:21.185756', 'step': 587, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:21.239168', 'step': 587, 'epoch': 1} {'type': 'loss', 'content': 0.18499408662319183, 'timestamp': '2025-10-01 04:16:21.244892', 'step': 588, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:21.297538', 'step': 588, 'epoch': 1} {'type': 'loss', 'content': 0.24607986211776733, 'timestamp': '2025-10-01 04:16:21.299895', 'step': 589, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:21.352712', 'step': 589, 'epoch': 1} {'type': 'loss', 'content': 0.1812414824962616, 'timestamp': '2025-10-01 04:16:21.354493', 'step': 590, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:16:21.408268', 'step': 590, 'epoch': 1} {'type': 'loss', 'content': 0.207962766289711, 'timestamp': '2025-10-01 04:16:21.412168', 'step': 591, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-01 04:16:21.465679', 'step': 591, 'epoch': 1} {'type': 'loss', 'content': 0.1801251620054245, 'timestamp': '2025-10-01 04:16:21.471520', 'step': 592, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:21.528107', 'step': 592, 'epoch': 1} {'type': 'loss', 'content': 0.23560653626918793, 'timestamp': '2025-10-01 04:16:21.530366', 'step': 593, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:21.584175', 'step': 593, 'epoch': 1} {'type': 'loss', 'content': 0.1481844037771225, 'timestamp': '2025-10-01 04:16:21.586567', 'step': 594, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:21.639446', 'step': 594, 'epoch': 1} {'type': 'loss', 'content': 0.1520189493894577, 'timestamp': '2025-10-01 04:16:21.642590', 'step': 595, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:21.695416', 'step': 595, 'epoch': 1} {'type': 'loss', 'content': 0.363468736410141, 'timestamp': '2025-10-01 04:16:21.701020', 'step': 596, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:16:21.754311', 'step': 596, 'epoch': 1} {'type': 'loss', 'content': 0.18463514745235443, 'timestamp': '2025-10-01 04:16:21.756422', 'step': 597, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:21.809293', 'step': 597, 'epoch': 1} {'type': 'loss', 'content': 0.22240322828292847, 'timestamp': '2025-10-01 04:16:21.811357', 'step': 598, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:21.864272', 'step': 598, 'epoch': 1} {'type': 'loss', 'content': 0.2422625869512558, 'timestamp': '2025-10-01 04:16:21.867108', 'step': 599, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:16:21.921247', 'step': 599, 'epoch': 1} {'type': 'loss', 'content': 0.23422691226005554, 'timestamp': '2025-10-01 04:16:21.928804', 'step': 600, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:21.981624', 'step': 600, 'epoch': 1} {'type': 'loss', 'content': 0.16694596409797668, 'timestamp': '2025-10-01 04:16:21.983731', 'step': 601, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:16:22.039726', 'step': 601, 'epoch': 1} {'type': 'loss', 'content': 0.29648473858833313, 'timestamp': '2025-10-01 04:16:22.041799', 'step': 602, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:22.095430', 'step': 602, 'epoch': 1} {'type': 'loss', 'content': 0.2508189082145691, 'timestamp': '2025-10-01 04:16:22.097210', 'step': 603, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:22.149846', 'step': 603, 'epoch': 1} {'type': 'loss', 'content': 0.16467677056789398, 'timestamp': '2025-10-01 04:16:22.155604', 'step': 604, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:16:22.208249', 'step': 604, 'epoch': 1} {'type': 'loss', 'content': 0.14858655631542206, 'timestamp': '2025-10-01 04:16:22.212243', 'step': 605, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:22.265243', 'step': 605, 'epoch': 1} {'type': 'loss', 'content': 0.15501274168491364, 'timestamp': '2025-10-01 04:16:22.270217', 'step': 606, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:22.323332', 'step': 606, 'epoch': 1} {'type': 'loss', 'content': 0.08715201914310455, 'timestamp': '2025-10-01 04:16:22.325641', 'step': 607, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:22.379052', 'step': 607, 'epoch': 1} {'type': 'loss', 'content': 0.16061103343963623, 'timestamp': '2025-10-01 04:16:22.384571', 'step': 608, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:22.439373', 'step': 608, 'epoch': 1} {'type': 'loss', 'content': 0.12044528126716614, 'timestamp': '2025-10-01 04:16:22.450157', 'step': 609, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:22.506130', 'step': 609, 'epoch': 1} {'type': 'loss', 'content': 0.23300157487392426, 'timestamp': '2025-10-01 04:16:22.508211', 'step': 610, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:22.560823', 'step': 610, 'epoch': 1} {'type': 'loss', 'content': 0.2515503764152527, 'timestamp': '2025-10-01 04:16:22.562877', 'step': 611, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:22.615994', 'step': 611, 'epoch': 1} {'type': 'loss', 'content': 0.22755230963230133, 'timestamp': '2025-10-01 04:16:22.621393', 'step': 612, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:22.673862', 'step': 612, 'epoch': 1} {'type': 'loss', 'content': 0.15520106256008148, 'timestamp': '2025-10-01 04:16:22.675948', 'step': 613, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:22.729460', 'step': 613, 'epoch': 1} {'type': 'loss', 'content': 0.23103103041648865, 'timestamp': '2025-10-01 04:16:22.731364', 'step': 614, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:22.784680', 'step': 614, 'epoch': 1} {'type': 'loss', 'content': 0.23400908708572388, 'timestamp': '2025-10-01 04:16:22.787034', 'step': 615, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:22.840452', 'step': 615, 'epoch': 1} {'type': 'loss', 'content': 0.1712394803762436, 'timestamp': '2025-10-01 04:16:22.857804', 'step': 616, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:22.925964', 'step': 616, 'epoch': 1} {'type': 'loss', 'content': 0.22661331295967102, 'timestamp': '2025-10-01 04:16:22.927629', 'step': 617, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:23.009901', 'step': 617, 'epoch': 1} {'type': 'loss', 'content': 0.2613818347454071, 'timestamp': '2025-10-01 04:16:23.013280', 'step': 618, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:23.074865', 'step': 618, 'epoch': 1} {'type': 'loss', 'content': 0.15058331191539764, 'timestamp': '2025-10-01 04:16:23.084430', 'step': 619, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:16:23.150635', 'step': 619, 'epoch': 1} {'type': 'loss', 'content': 0.2486276626586914, 'timestamp': '2025-10-01 04:16:23.160264', 'step': 620, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:23.227095', 'step': 620, 'epoch': 1} {'type': 'loss', 'content': 0.24349284172058105, 'timestamp': '2025-10-01 04:16:23.248035', 'step': 621, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:16:23.313770', 'step': 621, 'epoch': 1} {'type': 'loss', 'content': 0.24613961577415466, 'timestamp': '2025-10-01 04:16:23.331810', 'step': 622, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:23.391968', 'step': 622, 'epoch': 1} {'type': 'loss', 'content': 0.2544879913330078, 'timestamp': '2025-10-01 04:16:23.401998', 'step': 623, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:23.463249', 'step': 623, 'epoch': 1} {'type': 'loss', 'content': 0.2048039436340332, 'timestamp': '2025-10-01 04:16:23.474767', 'step': 624, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:23.553686', 'step': 624, 'epoch': 1} {'type': 'loss', 'content': 0.1412048637866974, 'timestamp': '2025-10-01 04:16:23.564825', 'step': 625, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:23.636944', 'step': 625, 'epoch': 1} {'type': 'loss', 'content': 0.20992812514305115, 'timestamp': '2025-10-01 04:16:23.644379', 'step': 626, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:23.725118', 'step': 626, 'epoch': 1} {'type': 'loss', 'content': 0.2567172944545746, 'timestamp': '2025-10-01 04:16:23.727344', 'step': 627, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:16:23.793430', 'step': 627, 'epoch': 1} {'type': 'loss', 'content': 0.16410009562969208, 'timestamp': '2025-10-01 04:16:23.799720', 'step': 628, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:23.874858', 'step': 628, 'epoch': 1} {'type': 'loss', 'content': 0.25525882840156555, 'timestamp': '2025-10-01 04:16:23.879703', 'step': 629, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:23.956021', 'step': 629, 'epoch': 1} {'type': 'loss', 'content': 0.1526636779308319, 'timestamp': '2025-10-01 04:16:23.963147', 'step': 630, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:24.020278', 'step': 630, 'epoch': 1} {'type': 'loss', 'content': 0.15255968272686005, 'timestamp': '2025-10-01 04:16:24.023853', 'step': 631, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:24.081504', 'step': 631, 'epoch': 1} {'type': 'loss', 'content': 0.20472918450832367, 'timestamp': '2025-10-01 04:16:24.087223', 'step': 632, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:24.142992', 'step': 632, 'epoch': 1} {'type': 'loss', 'content': 0.14604078233242035, 'timestamp': '2025-10-01 04:16:24.144999', 'step': 633, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:24.200007', 'step': 633, 'epoch': 1} {'type': 'loss', 'content': 0.23653072118759155, 'timestamp': '2025-10-01 04:16:24.202102', 'step': 634, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:16:24.260306', 'step': 634, 'epoch': 1} {'type': 'loss', 'content': 0.14344041049480438, 'timestamp': '2025-10-01 04:16:24.262423', 'step': 635, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:24.315748', 'step': 635, 'epoch': 1} {'type': 'loss', 'content': 0.17951543629169464, 'timestamp': '2025-10-01 04:16:24.321758', 'step': 636, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:24.374349', 'step': 636, 'epoch': 1} {'type': 'loss', 'content': 0.18032006919384003, 'timestamp': '2025-10-01 04:16:24.376433', 'step': 637, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:24.429860', 'step': 637, 'epoch': 1} {'type': 'loss', 'content': 0.21392498910427094, 'timestamp': '2025-10-01 04:16:24.432853', 'step': 638, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:24.487284', 'step': 638, 'epoch': 1} {'type': 'loss', 'content': 0.20178250968456268, 'timestamp': '2025-10-01 04:16:24.489724', 'step': 639, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:16:24.542634', 'step': 639, 'epoch': 1} {'type': 'loss', 'content': 0.1554747074842453, 'timestamp': '2025-10-01 04:16:24.548451', 'step': 640, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:24.601643', 'step': 640, 'epoch': 1} {'type': 'loss', 'content': 0.21824213862419128, 'timestamp': '2025-10-01 04:16:24.604609', 'step': 641, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:24.659670', 'step': 641, 'epoch': 1} {'type': 'loss', 'content': 0.1919361799955368, 'timestamp': '2025-10-01 04:16:24.661880', 'step': 642, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:24.720283', 'step': 642, 'epoch': 1} {'type': 'loss', 'content': 0.21068887412548065, 'timestamp': '2025-10-01 04:16:24.722499', 'step': 643, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:24.776470', 'step': 643, 'epoch': 1} {'type': 'loss', 'content': 0.1980298012495041, 'timestamp': '2025-10-01 04:16:24.782297', 'step': 644, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:24.836555', 'step': 644, 'epoch': 1} {'type': 'loss', 'content': 0.2136356085538864, 'timestamp': '2025-10-01 04:16:24.839232', 'step': 645, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:24.892707', 'step': 645, 'epoch': 1} {'type': 'loss', 'content': 0.17033801972866058, 'timestamp': '2025-10-01 04:16:24.894940', 'step': 646, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:24.947988', 'step': 646, 'epoch': 1} {'type': 'loss', 'content': 0.21382269263267517, 'timestamp': '2025-10-01 04:16:24.955174', 'step': 647, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:25.007900', 'step': 647, 'epoch': 1} {'type': 'loss', 'content': 0.24930161237716675, 'timestamp': '2025-10-01 04:16:25.013759', 'step': 648, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:25.066481', 'step': 648, 'epoch': 1} {'type': 'loss', 'content': 0.1846536546945572, 'timestamp': '2025-10-01 04:16:25.068896', 'step': 649, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:25.122128', 'step': 649, 'epoch': 1} {'type': 'loss', 'content': 0.17885038256645203, 'timestamp': '2025-10-01 04:16:25.124009', 'step': 650, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:16:25.176926', 'step': 650, 'epoch': 1} {'type': 'loss', 'content': 0.363503634929657, 'timestamp': '2025-10-01 04:16:25.179331', 'step': 651, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:25.233159', 'step': 651, 'epoch': 1} {'type': 'loss', 'content': 0.09465138614177704, 'timestamp': '2025-10-01 04:16:25.238971', 'step': 652, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:16:25.294933', 'step': 652, 'epoch': 1} {'type': 'loss', 'content': 0.19438304007053375, 'timestamp': '2025-10-01 04:16:25.296696', 'step': 653, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:25.350955', 'step': 653, 'epoch': 1} {'type': 'loss', 'content': 0.22221451997756958, 'timestamp': '2025-10-01 04:16:25.352944', 'step': 654, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:16:25.406186', 'step': 654, 'epoch': 1} {'type': 'loss', 'content': 0.21547408401966095, 'timestamp': '2025-10-01 04:16:25.407992', 'step': 655, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:25.461455', 'step': 655, 'epoch': 1} {'type': 'loss', 'content': 0.10093619674444199, 'timestamp': '2025-10-01 04:16:25.467045', 'step': 656, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:25.519630', 'step': 656, 'epoch': 1} {'type': 'loss', 'content': 0.16066612303256989, 'timestamp': '2025-10-01 04:16:25.521485', 'step': 657, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:25.575060', 'step': 657, 'epoch': 1} {'type': 'loss', 'content': 0.14020629227161407, 'timestamp': '2025-10-01 04:16:25.587355', 'step': 658, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:25.640287', 'step': 658, 'epoch': 1} {'type': 'loss', 'content': 0.1960950344800949, 'timestamp': '2025-10-01 04:16:25.643500', 'step': 659, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:25.697927', 'step': 659, 'epoch': 1} {'type': 'loss', 'content': 0.15612423419952393, 'timestamp': '2025-10-01 04:16:25.711327', 'step': 660, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:25.765173', 'step': 660, 'epoch': 1} {'type': 'loss', 'content': 0.19020064175128937, 'timestamp': '2025-10-01 04:16:25.773015', 'step': 661, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:25.827902', 'step': 661, 'epoch': 1} {'type': 'loss', 'content': 0.24516992270946503, 'timestamp': '2025-10-01 04:16:25.830365', 'step': 662, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:25.884156', 'step': 662, 'epoch': 1} {'type': 'loss', 'content': 0.279060423374176, 'timestamp': '2025-10-01 04:16:25.886545', 'step': 663, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:25.939709', 'step': 663, 'epoch': 1} {'type': 'loss', 'content': 0.22064675390720367, 'timestamp': '2025-10-01 04:16:25.945993', 'step': 664, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:25.999164', 'step': 664, 'epoch': 1} {'type': 'loss', 'content': 0.23418861627578735, 'timestamp': '2025-10-01 04:16:26.001691', 'step': 665, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:26.056796', 'step': 665, 'epoch': 1} {'type': 'loss', 'content': 0.13190297782421112, 'timestamp': '2025-10-01 04:16:26.061820', 'step': 666, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:16:26.124680', 'step': 666, 'epoch': 1} {'type': 'loss', 'content': 0.2107946276664734, 'timestamp': '2025-10-01 04:16:26.135385', 'step': 667, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:26.205605', 'step': 667, 'epoch': 1} {'type': 'loss', 'content': 0.2030247151851654, 'timestamp': '2025-10-01 04:16:26.212270', 'step': 668, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:26.272196', 'step': 668, 'epoch': 1} {'type': 'loss', 'content': 0.36288902163505554, 'timestamp': '2025-10-01 04:16:26.274074', 'step': 669, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:16:26.327946', 'step': 669, 'epoch': 1} {'type': 'loss', 'content': 0.26238352060317993, 'timestamp': '2025-10-01 04:16:26.330353', 'step': 670, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:26.392887', 'step': 670, 'epoch': 1} {'type': 'loss', 'content': 0.1834440529346466, 'timestamp': '2025-10-01 04:16:26.395211', 'step': 671, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:26.448827', 'step': 671, 'epoch': 1} {'type': 'loss', 'content': 0.19237922132015228, 'timestamp': '2025-10-01 04:16:26.454912', 'step': 672, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:26.509006', 'step': 672, 'epoch': 1} {'type': 'loss', 'content': 0.18353702127933502, 'timestamp': '2025-10-01 04:16:26.511551', 'step': 673, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:26.565191', 'step': 673, 'epoch': 1} {'type': 'loss', 'content': 0.1879715621471405, 'timestamp': '2025-10-01 04:16:26.567906', 'step': 674, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:26.621377', 'step': 674, 'epoch': 1} {'type': 'loss', 'content': 0.1807398945093155, 'timestamp': '2025-10-01 04:16:26.623549', 'step': 675, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:16:26.677110', 'step': 675, 'epoch': 1} {'type': 'loss', 'content': 0.16108499467372894, 'timestamp': '2025-10-01 04:16:26.682704', 'step': 676, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:26.736541', 'step': 676, 'epoch': 1} {'type': 'loss', 'content': 0.15935248136520386, 'timestamp': '2025-10-01 04:16:26.738716', 'step': 677, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:26.793840', 'step': 677, 'epoch': 1} {'type': 'loss', 'content': 0.31900033354759216, 'timestamp': '2025-10-01 04:16:26.795927', 'step': 678, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:26.849667', 'step': 678, 'epoch': 1} {'type': 'loss', 'content': 0.19086992740631104, 'timestamp': '2025-10-01 04:16:26.852078', 'step': 679, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:16:26.905710', 'step': 679, 'epoch': 1} {'type': 'loss', 'content': 0.21249984204769135, 'timestamp': '2025-10-01 04:16:26.911885', 'step': 680, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:26.965174', 'step': 680, 'epoch': 1} {'type': 'loss', 'content': 0.18287789821624756, 'timestamp': '2025-10-01 04:16:26.967505', 'step': 681, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:27.020841', 'step': 681, 'epoch': 1} {'type': 'loss', 'content': 0.19713884592056274, 'timestamp': '2025-10-01 04:16:27.024332', 'step': 682, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:27.078306', 'step': 682, 'epoch': 1} {'type': 'loss', 'content': 0.18559964001178741, 'timestamp': '2025-10-01 04:16:27.092490', 'step': 683, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:27.146818', 'step': 683, 'epoch': 1} {'type': 'loss', 'content': 0.1478031575679779, 'timestamp': '2025-10-01 04:16:27.153046', 'step': 684, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:27.206504', 'step': 684, 'epoch': 1} {'type': 'loss', 'content': 0.2116301953792572, 'timestamp': '2025-10-01 04:16:27.208624', 'step': 685, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:27.268925', 'step': 685, 'epoch': 1} {'type': 'loss', 'content': 0.21678856015205383, 'timestamp': '2025-10-01 04:16:27.271279', 'step': 686, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:27.325708', 'step': 686, 'epoch': 1} {'type': 'loss', 'content': 0.1944548636674881, 'timestamp': '2025-10-01 04:16:27.328186', 'step': 687, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:27.381255', 'step': 687, 'epoch': 1} {'type': 'loss', 'content': 0.13102830946445465, 'timestamp': '2025-10-01 04:16:27.391341', 'step': 688, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:27.445523', 'step': 688, 'epoch': 1} {'type': 'loss', 'content': 0.20031429827213287, 'timestamp': '2025-10-01 04:16:27.449241', 'step': 689, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:16:27.502840', 'step': 689, 'epoch': 1} {'type': 'loss', 'content': 0.26028481125831604, 'timestamp': '2025-10-01 04:16:27.504916', 'step': 690, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:27.560014', 'step': 690, 'epoch': 1} {'type': 'loss', 'content': 0.1778286248445511, 'timestamp': '2025-10-01 04:16:27.562106', 'step': 691, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:27.615624', 'step': 691, 'epoch': 1} {'type': 'loss', 'content': 0.19671224057674408, 'timestamp': '2025-10-01 04:16:27.621570', 'step': 692, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:27.674184', 'step': 692, 'epoch': 1} {'type': 'loss', 'content': 0.1983032524585724, 'timestamp': '2025-10-01 04:16:27.676257', 'step': 693, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:27.730171', 'step': 693, 'epoch': 1} {'type': 'loss', 'content': 0.2748071849346161, 'timestamp': '2025-10-01 04:16:27.732265', 'step': 694, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:27.785176', 'step': 694, 'epoch': 1} {'type': 'loss', 'content': 0.26852700114250183, 'timestamp': '2025-10-01 04:16:27.791959', 'step': 695, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:27.848799', 'step': 695, 'epoch': 1} {'type': 'loss', 'content': 0.15408672392368317, 'timestamp': '2025-10-01 04:16:27.854358', 'step': 696, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:27.910786', 'step': 696, 'epoch': 1} {'type': 'loss', 'content': 0.2277919352054596, 'timestamp': '2025-10-01 04:16:27.912824', 'step': 697, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:16:27.965483', 'step': 697, 'epoch': 1} {'type': 'loss', 'content': 0.19375112652778625, 'timestamp': '2025-10-01 04:16:27.967500', 'step': 698, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:28.020239', 'step': 698, 'epoch': 1} {'type': 'loss', 'content': 0.24620619416236877, 'timestamp': '2025-10-01 04:16:28.033435', 'step': 699, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:28.088997', 'step': 699, 'epoch': 1} {'type': 'loss', 'content': 0.10471098870038986, 'timestamp': '2025-10-01 04:16:28.094490', 'step': 700, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:28.154035', 'step': 700, 'epoch': 1} {'type': 'loss', 'content': 0.23981355130672455, 'timestamp': '2025-10-01 04:16:28.156035', 'step': 701, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:28.208734', 'step': 701, 'epoch': 1} {'type': 'loss', 'content': 0.15402287244796753, 'timestamp': '2025-10-01 04:16:28.211016', 'step': 702, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:28.264289', 'step': 702, 'epoch': 1} {'type': 'loss', 'content': 0.22099952399730682, 'timestamp': '2025-10-01 04:16:28.271951', 'step': 703, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:28.328319', 'step': 703, 'epoch': 1} {'type': 'loss', 'content': 0.20100705325603485, 'timestamp': '2025-10-01 04:16:28.334188', 'step': 704, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:28.393634', 'step': 704, 'epoch': 1} {'type': 'loss', 'content': 0.2213112711906433, 'timestamp': '2025-10-01 04:16:28.399546', 'step': 705, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:28.455662', 'step': 705, 'epoch': 1} {'type': 'loss', 'content': 0.21889245510101318, 'timestamp': '2025-10-01 04:16:28.457450', 'step': 706, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:16:28.510544', 'step': 706, 'epoch': 1} {'type': 'loss', 'content': 0.27992093563079834, 'timestamp': '2025-10-01 04:16:28.512508', 'step': 707, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:28.565349', 'step': 707, 'epoch': 1} {'type': 'loss', 'content': 0.18604609370231628, 'timestamp': '2025-10-01 04:16:28.571047', 'step': 708, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:28.623187', 'step': 708, 'epoch': 1} {'type': 'loss', 'content': 0.1521005630493164, 'timestamp': '2025-10-01 04:16:28.625348', 'step': 709, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:28.681640', 'step': 709, 'epoch': 1} {'type': 'loss', 'content': 0.19312113523483276, 'timestamp': '2025-10-01 04:16:28.683552', 'step': 710, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:28.736242', 'step': 710, 'epoch': 1} {'type': 'loss', 'content': 0.196952223777771, 'timestamp': '2025-10-01 04:16:28.738250', 'step': 711, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:28.793415', 'step': 711, 'epoch': 1} {'type': 'loss', 'content': 0.14299823343753815, 'timestamp': '2025-10-01 04:16:28.799446', 'step': 712, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:16:28.852856', 'step': 712, 'epoch': 1} {'type': 'loss', 'content': 0.22515173256397247, 'timestamp': '2025-10-01 04:16:28.860917', 'step': 713, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:28.914026', 'step': 713, 'epoch': 1} {'type': 'loss', 'content': 0.1725146323442459, 'timestamp': '2025-10-01 04:16:28.916126', 'step': 714, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:28.969796', 'step': 714, 'epoch': 1} {'type': 'loss', 'content': 0.16749750077724457, 'timestamp': '2025-10-01 04:16:28.975895', 'step': 715, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:29.030957', 'step': 715, 'epoch': 1} {'type': 'loss', 'content': 0.18406662344932556, 'timestamp': '2025-10-01 04:16:29.036786', 'step': 716, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:16:29.089169', 'step': 716, 'epoch': 1} {'type': 'loss', 'content': 0.1431330293416977, 'timestamp': '2025-10-01 04:16:29.091433', 'step': 717, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:29.144727', 'step': 717, 'epoch': 1} {'type': 'loss', 'content': 0.2294846922159195, 'timestamp': '2025-10-01 04:16:29.146887', 'step': 718, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:29.200016', 'step': 718, 'epoch': 1} {'type': 'loss', 'content': 0.2178138941526413, 'timestamp': '2025-10-01 04:16:29.201819', 'step': 719, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:29.254049', 'step': 719, 'epoch': 1} {'type': 'loss', 'content': 0.15089891850948334, 'timestamp': '2025-10-01 04:16:29.260208', 'step': 720, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:29.312737', 'step': 720, 'epoch': 1} {'type': 'loss', 'content': 0.12078775465488434, 'timestamp': '2025-10-01 04:16:29.317827', 'step': 721, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:29.371158', 'step': 721, 'epoch': 1} {'type': 'loss', 'content': 0.2122882753610611, 'timestamp': '2025-10-01 04:16:29.373809', 'step': 722, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:29.430210', 'step': 722, 'epoch': 1} {'type': 'loss', 'content': 0.21831224858760834, 'timestamp': '2025-10-01 04:16:29.432383', 'step': 723, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:16:29.485854', 'step': 723, 'epoch': 1} {'type': 'loss', 'content': 0.09377311915159225, 'timestamp': '2025-10-01 04:16:29.491852', 'step': 724, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:29.543841', 'step': 724, 'epoch': 1} {'type': 'loss', 'content': 0.22642454504966736, 'timestamp': '2025-10-01 04:16:29.545772', 'step': 725, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:29.598018', 'step': 725, 'epoch': 1} {'type': 'loss', 'content': 0.23924115300178528, 'timestamp': '2025-10-01 04:16:29.601299', 'step': 726, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:29.653912', 'step': 726, 'epoch': 1} {'type': 'loss', 'content': 0.1495794653892517, 'timestamp': '2025-10-01 04:16:29.661517', 'step': 727, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:29.718754', 'step': 727, 'epoch': 1} {'type': 'loss', 'content': 0.16917623579502106, 'timestamp': '2025-10-01 04:16:29.725051', 'step': 728, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:29.780853', 'step': 728, 'epoch': 1} {'type': 'loss', 'content': 0.18223942816257477, 'timestamp': '2025-10-01 04:16:29.782710', 'step': 729, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:29.838635', 'step': 729, 'epoch': 1} {'type': 'loss', 'content': 0.17328360676765442, 'timestamp': '2025-10-01 04:16:29.840589', 'step': 730, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:29.893504', 'step': 730, 'epoch': 1} {'type': 'loss', 'content': 0.2331034243106842, 'timestamp': '2025-10-01 04:16:29.895855', 'step': 731, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:29.948273', 'step': 731, 'epoch': 1} {'type': 'loss', 'content': 0.1557484269142151, 'timestamp': '2025-10-01 04:16:29.954277', 'step': 732, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:30.010356', 'step': 732, 'epoch': 1} {'type': 'loss', 'content': 0.19933784008026123, 'timestamp': '2025-10-01 04:16:30.012368', 'step': 733, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:30.065263', 'step': 733, 'epoch': 1} {'type': 'loss', 'content': 0.23728664219379425, 'timestamp': '2025-10-01 04:16:30.067406', 'step': 734, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:30.123596', 'step': 734, 'epoch': 1} {'type': 'loss', 'content': 0.29702574014663696, 'timestamp': '2025-10-01 04:16:30.129178', 'step': 735, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:16:30.189497', 'step': 735, 'epoch': 1} {'type': 'loss', 'content': 0.19916574656963348, 'timestamp': '2025-10-01 04:16:30.202797', 'step': 736, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:16:30.258553', 'step': 736, 'epoch': 1} {'type': 'loss', 'content': 0.17441920936107635, 'timestamp': '2025-10-01 04:16:30.260469', 'step': 737, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:30.318251', 'step': 737, 'epoch': 1} {'type': 'loss', 'content': 0.1440543234348297, 'timestamp': '2025-10-01 04:16:30.320175', 'step': 738, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:30.372626', 'step': 738, 'epoch': 1} {'type': 'loss', 'content': 0.1501755565404892, 'timestamp': '2025-10-01 04:16:30.374661', 'step': 739, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:30.432050', 'step': 739, 'epoch': 1} {'type': 'loss', 'content': 0.11005999892950058, 'timestamp': '2025-10-01 04:16:30.437806', 'step': 740, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:30.490645', 'step': 740, 'epoch': 1} {'type': 'loss', 'content': 0.16872350871562958, 'timestamp': '2025-10-01 04:16:30.492585', 'step': 741, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:30.545180', 'step': 741, 'epoch': 1} {'type': 'loss', 'content': 0.16839252412319183, 'timestamp': '2025-10-01 04:16:30.547134', 'step': 742, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:30.600821', 'step': 742, 'epoch': 1} {'type': 'loss', 'content': 0.2167457491159439, 'timestamp': '2025-10-01 04:16:30.602887', 'step': 743, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:30.656168', 'step': 743, 'epoch': 1} {'type': 'loss', 'content': 0.19151149690151215, 'timestamp': '2025-10-01 04:16:30.667951', 'step': 744, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:30.721250', 'step': 744, 'epoch': 1} {'type': 'loss', 'content': 0.15178506076335907, 'timestamp': '2025-10-01 04:16:30.730169', 'step': 745, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:30.784850', 'step': 745, 'epoch': 1} {'type': 'loss', 'content': 0.2165800929069519, 'timestamp': '2025-10-01 04:16:30.789016', 'step': 746, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:16:30.843023', 'step': 746, 'epoch': 1} {'type': 'loss', 'content': 0.19421249628067017, 'timestamp': '2025-10-01 04:16:30.845305', 'step': 747, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:30.899325', 'step': 747, 'epoch': 1} {'type': 'loss', 'content': 0.220596045255661, 'timestamp': '2025-10-01 04:16:30.906062', 'step': 748, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:16:30.959140', 'step': 748, 'epoch': 1} {'type': 'loss', 'content': 0.2086450755596161, 'timestamp': '2025-10-01 04:16:30.961192', 'step': 749, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:31.022671', 'step': 749, 'epoch': 1} {'type': 'loss', 'content': 0.279861718416214, 'timestamp': '2025-10-01 04:16:31.032122', 'step': 750, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:31.085875', 'step': 750, 'epoch': 1} {'type': 'loss', 'content': 0.23904094099998474, 'timestamp': '2025-10-01 04:16:31.087687', 'step': 751, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:31.141033', 'step': 751, 'epoch': 1} {'type': 'loss', 'content': 0.2679554522037506, 'timestamp': '2025-10-01 04:16:31.147208', 'step': 752, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:31.200194', 'step': 752, 'epoch': 1} {'type': 'loss', 'content': 0.18831004202365875, 'timestamp': '2025-10-01 04:16:31.202461', 'step': 753, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:31.255868', 'step': 753, 'epoch': 1} {'type': 'loss', 'content': 0.23976728320121765, 'timestamp': '2025-10-01 04:16:31.257821', 'step': 754, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:31.312536', 'step': 754, 'epoch': 1} {'type': 'loss', 'content': 0.17706158757209778, 'timestamp': '2025-10-01 04:16:31.314496', 'step': 755, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:31.367992', 'step': 755, 'epoch': 1} {'type': 'loss', 'content': 0.17365919053554535, 'timestamp': '2025-10-01 04:16:31.373615', 'step': 756, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:31.427593', 'step': 756, 'epoch': 1} {'type': 'loss', 'content': 0.1639547049999237, 'timestamp': '2025-10-01 04:16:31.429884', 'step': 757, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:31.483638', 'step': 757, 'epoch': 1} {'type': 'loss', 'content': 0.3086547553539276, 'timestamp': '2025-10-01 04:16:31.485583', 'step': 758, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:31.540026', 'step': 758, 'epoch': 1} {'type': 'loss', 'content': 0.14684735238552094, 'timestamp': '2025-10-01 04:16:31.542462', 'step': 759, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:31.595722', 'step': 759, 'epoch': 1} {'type': 'loss', 'content': 0.13436684012413025, 'timestamp': '2025-10-01 04:16:31.602132', 'step': 760, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:31.654295', 'step': 760, 'epoch': 1} {'type': 'loss', 'content': 0.14290164411067963, 'timestamp': '2025-10-01 04:16:31.656369', 'step': 761, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:31.709449', 'step': 761, 'epoch': 1} {'type': 'loss', 'content': 0.14889250695705414, 'timestamp': '2025-10-01 04:16:31.712320', 'step': 762, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:31.765362', 'step': 762, 'epoch': 1} {'type': 'loss', 'content': 0.29363399744033813, 'timestamp': '2025-10-01 04:16:31.774621', 'step': 763, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:31.828105', 'step': 763, 'epoch': 1} {'type': 'loss', 'content': 0.16448871791362762, 'timestamp': '2025-10-01 04:16:31.834667', 'step': 764, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:31.887271', 'step': 764, 'epoch': 1} {'type': 'loss', 'content': 0.30326178669929504, 'timestamp': '2025-10-01 04:16:31.889272', 'step': 765, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:31.942687', 'step': 765, 'epoch': 1} {'type': 'loss', 'content': 0.2606669068336487, 'timestamp': '2025-10-01 04:16:31.944599', 'step': 766, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:16:31.997542', 'step': 766, 'epoch': 1} {'type': 'loss', 'content': 0.13241812586784363, 'timestamp': '2025-10-01 04:16:32.006129', 'step': 767, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:32.060731', 'step': 767, 'epoch': 1} {'type': 'loss', 'content': 0.19033583998680115, 'timestamp': '2025-10-01 04:16:32.067350', 'step': 768, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:32.121335', 'step': 768, 'epoch': 1} {'type': 'loss', 'content': 0.2322079837322235, 'timestamp': '2025-10-01 04:16:32.123374', 'step': 769, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:32.180495', 'step': 769, 'epoch': 1} {'type': 'loss', 'content': 0.19254127144813538, 'timestamp': '2025-10-01 04:16:32.182408', 'step': 770, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:32.236027', 'step': 770, 'epoch': 1} {'type': 'loss', 'content': 0.15875063836574554, 'timestamp': '2025-10-01 04:16:32.238036', 'step': 771, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:16:32.294747', 'step': 771, 'epoch': 1} {'type': 'loss', 'content': 0.0886833593249321, 'timestamp': '2025-10-01 04:16:32.300755', 'step': 772, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:32.352750', 'step': 772, 'epoch': 1} {'type': 'loss', 'content': 0.1925983428955078, 'timestamp': '2025-10-01 04:16:32.354675', 'step': 773, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:32.407377', 'step': 773, 'epoch': 1} {'type': 'loss', 'content': 0.23967930674552917, 'timestamp': '2025-10-01 04:16:32.409399', 'step': 774, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:16:32.462208', 'step': 774, 'epoch': 1} {'type': 'loss', 'content': 0.11372118443250656, 'timestamp': '2025-10-01 04:16:32.464352', 'step': 775, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:32.517611', 'step': 775, 'epoch': 1} {'type': 'loss', 'content': 0.25159209966659546, 'timestamp': '2025-10-01 04:16:32.528421', 'step': 776, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:32.587848', 'step': 776, 'epoch': 1} {'type': 'loss', 'content': 0.22360163927078247, 'timestamp': '2025-10-01 04:16:32.589937', 'step': 777, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:16:32.642674', 'step': 777, 'epoch': 1} {'type': 'loss', 'content': 0.08558185398578644, 'timestamp': '2025-10-01 04:16:32.644837', 'step': 778, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:32.697900', 'step': 778, 'epoch': 1} {'type': 'loss', 'content': 0.2712377607822418, 'timestamp': '2025-10-01 04:16:32.699870', 'step': 779, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:32.752765', 'step': 779, 'epoch': 1} {'type': 'loss', 'content': 0.25822338461875916, 'timestamp': '2025-10-01 04:16:32.758442', 'step': 780, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:32.810549', 'step': 780, 'epoch': 1} {'type': 'loss', 'content': 0.14277862012386322, 'timestamp': '2025-10-01 04:16:32.812500', 'step': 781, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:16:32.864851', 'step': 781, 'epoch': 1} {'type': 'loss', 'content': 0.15719974040985107, 'timestamp': '2025-10-01 04:16:32.867014', 'step': 782, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:32.919991', 'step': 782, 'epoch': 1} {'type': 'loss', 'content': 0.21914102137088776, 'timestamp': '2025-10-01 04:16:32.922282', 'step': 783, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:16:32.975574', 'step': 783, 'epoch': 1} {'type': 'loss', 'content': 0.20286566019058228, 'timestamp': '2025-10-01 04:16:32.981095', 'step': 784, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:33.034017', 'step': 784, 'epoch': 1} {'type': 'loss', 'content': 0.20609307289123535, 'timestamp': '2025-10-01 04:16:33.036310', 'step': 785, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:33.099063', 'step': 785, 'epoch': 1} {'type': 'loss', 'content': 0.3131110966205597, 'timestamp': '2025-10-01 04:16:33.100854', 'step': 786, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:33.153416', 'step': 786, 'epoch': 1} {'type': 'loss', 'content': 0.22738316655158997, 'timestamp': '2025-10-01 04:16:33.155401', 'step': 787, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:33.208786', 'step': 787, 'epoch': 1} {'type': 'loss', 'content': 0.14070484042167664, 'timestamp': '2025-10-01 04:16:33.214416', 'step': 788, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:33.267174', 'step': 788, 'epoch': 1} {'type': 'loss', 'content': 0.17285777628421783, 'timestamp': '2025-10-01 04:16:33.269778', 'step': 789, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:16:33.323196', 'step': 789, 'epoch': 1} {'type': 'loss', 'content': 0.31077098846435547, 'timestamp': '2025-10-01 04:16:33.325249', 'step': 790, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:33.380593', 'step': 790, 'epoch': 1} {'type': 'loss', 'content': 0.21004171669483185, 'timestamp': '2025-10-01 04:16:33.382614', 'step': 791, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:33.435488', 'step': 791, 'epoch': 1} {'type': 'loss', 'content': 0.17935940623283386, 'timestamp': '2025-10-01 04:16:33.441284', 'step': 792, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:33.494439', 'step': 792, 'epoch': 1} {'type': 'loss', 'content': 0.16554374992847443, 'timestamp': '2025-10-01 04:16:33.496796', 'step': 793, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:33.549760', 'step': 793, 'epoch': 1} {'type': 'loss', 'content': 0.28257283568382263, 'timestamp': '2025-10-01 04:16:33.552485', 'step': 794, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:33.607815', 'step': 794, 'epoch': 1} {'type': 'loss', 'content': 0.17100150883197784, 'timestamp': '2025-10-01 04:16:33.612490', 'step': 795, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:33.670210', 'step': 795, 'epoch': 1} {'type': 'loss', 'content': 0.15147554874420166, 'timestamp': '2025-10-01 04:16:33.675841', 'step': 796, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:33.728245', 'step': 796, 'epoch': 1} {'type': 'loss', 'content': 0.1248013973236084, 'timestamp': '2025-10-01 04:16:33.730237', 'step': 797, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:33.783276', 'step': 797, 'epoch': 1} {'type': 'loss', 'content': 0.13282550871372223, 'timestamp': '2025-10-01 04:16:33.798877', 'step': 798, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:33.865889', 'step': 798, 'epoch': 1} {'type': 'loss', 'content': 0.24408000707626343, 'timestamp': '2025-10-01 04:16:33.872901', 'step': 799, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:33.926155', 'step': 799, 'epoch': 1} {'type': 'loss', 'content': 0.2875482141971588, 'timestamp': '2025-10-01 04:16:33.932085', 'step': 800, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:33.986789', 'step': 800, 'epoch': 1} {'type': 'loss', 'content': 0.199947327375412, 'timestamp': '2025-10-01 04:16:33.988814', 'step': 801, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:34.042895', 'step': 801, 'epoch': 1} {'type': 'loss', 'content': 0.1507151871919632, 'timestamp': '2025-10-01 04:16:34.044966', 'step': 802, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:34.099397', 'step': 802, 'epoch': 1} {'type': 'loss', 'content': 0.23566459119319916, 'timestamp': '2025-10-01 04:16:34.101437', 'step': 803, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:34.155044', 'step': 803, 'epoch': 1} {'type': 'loss', 'content': 0.1748412549495697, 'timestamp': '2025-10-01 04:16:34.161157', 'step': 804, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:34.214667', 'step': 804, 'epoch': 1} {'type': 'loss', 'content': 0.12210222333669662, 'timestamp': '2025-10-01 04:16:34.216923', 'step': 805, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:34.271151', 'step': 805, 'epoch': 1} {'type': 'loss', 'content': 0.20751310884952545, 'timestamp': '2025-10-01 04:16:34.273399', 'step': 806, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:16:34.327878', 'step': 806, 'epoch': 1} {'type': 'loss', 'content': 0.28492647409439087, 'timestamp': '2025-10-01 04:16:34.329899', 'step': 807, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:16:34.384811', 'step': 807, 'epoch': 1} {'type': 'loss', 'content': 0.18792003393173218, 'timestamp': '2025-10-01 04:16:34.390947', 'step': 808, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:34.444644', 'step': 808, 'epoch': 1} {'type': 'loss', 'content': 0.21489672362804413, 'timestamp': '2025-10-01 04:16:34.446896', 'step': 809, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:34.501415', 'step': 809, 'epoch': 1} {'type': 'loss', 'content': 0.19334468245506287, 'timestamp': '2025-10-01 04:16:34.503713', 'step': 810, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:34.557901', 'step': 810, 'epoch': 1} {'type': 'loss', 'content': 0.2434062510728836, 'timestamp': '2025-10-01 04:16:34.559779', 'step': 811, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:34.613012', 'step': 811, 'epoch': 1} {'type': 'loss', 'content': 0.19145691394805908, 'timestamp': '2025-10-01 04:16:34.619284', 'step': 812, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:34.672886', 'step': 812, 'epoch': 1} {'type': 'loss', 'content': 0.1602669507265091, 'timestamp': '2025-10-01 04:16:34.675019', 'step': 813, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:34.730834', 'step': 813, 'epoch': 1} {'type': 'loss', 'content': 0.2003052681684494, 'timestamp': '2025-10-01 04:16:34.732666', 'step': 814, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:16:34.786004', 'step': 814, 'epoch': 1} {'type': 'loss', 'content': 0.20118799805641174, 'timestamp': '2025-10-01 04:16:34.788939', 'step': 815, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:34.842002', 'step': 815, 'epoch': 1} {'type': 'loss', 'content': 0.19595296680927277, 'timestamp': '2025-10-01 04:16:34.847529', 'step': 816, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:34.899702', 'step': 816, 'epoch': 1} {'type': 'loss', 'content': 0.20751707255840302, 'timestamp': '2025-10-01 04:16:34.902011', 'step': 817, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:34.955656', 'step': 817, 'epoch': 1} {'type': 'loss', 'content': 0.1797197461128235, 'timestamp': '2025-10-01 04:16:34.958051', 'step': 818, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:35.011759', 'step': 818, 'epoch': 1} {'type': 'loss', 'content': 0.18667396903038025, 'timestamp': '2025-10-01 04:16:35.014119', 'step': 819, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:35.067779', 'step': 819, 'epoch': 1} {'type': 'loss', 'content': 0.16966193914413452, 'timestamp': '2025-10-01 04:16:35.074227', 'step': 820, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:16:35.127482', 'step': 820, 'epoch': 1} {'type': 'loss', 'content': 0.26714372634887695, 'timestamp': '2025-10-01 04:16:35.129798', 'step': 821, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:35.184042', 'step': 821, 'epoch': 1} {'type': 'loss', 'content': 0.1896323263645172, 'timestamp': '2025-10-01 04:16:35.186566', 'step': 822, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:35.240773', 'step': 822, 'epoch': 1} {'type': 'loss', 'content': 0.15752479434013367, 'timestamp': '2025-10-01 04:16:35.243188', 'step': 823, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:35.298834', 'step': 823, 'epoch': 1} {'type': 'loss', 'content': 0.14606714248657227, 'timestamp': '2025-10-01 04:16:35.305093', 'step': 824, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:16:35.358373', 'step': 824, 'epoch': 1} {'type': 'loss', 'content': 0.25343307852745056, 'timestamp': '2025-10-01 04:16:35.360738', 'step': 825, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:35.415659', 'step': 825, 'epoch': 1} {'type': 'loss', 'content': 0.13319054245948792, 'timestamp': '2025-10-01 04:16:35.418099', 'step': 826, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:35.472213', 'step': 826, 'epoch': 1} {'type': 'loss', 'content': 0.31661921739578247, 'timestamp': '2025-10-01 04:16:35.474224', 'step': 827, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:35.527865', 'step': 827, 'epoch': 1} {'type': 'loss', 'content': 0.23258043825626373, 'timestamp': '2025-10-01 04:16:35.536830', 'step': 828, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:16:35.590073', 'step': 828, 'epoch': 1} {'type': 'loss', 'content': 0.12971441447734833, 'timestamp': '2025-10-01 04:16:35.592135', 'step': 829, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:35.645736', 'step': 829, 'epoch': 1} {'type': 'loss', 'content': 0.14837267994880676, 'timestamp': '2025-10-01 04:16:35.647798', 'step': 830, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:35.703168', 'step': 830, 'epoch': 1} {'type': 'loss', 'content': 0.3411409258842468, 'timestamp': '2025-10-01 04:16:35.705612', 'step': 831, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:35.759490', 'step': 831, 'epoch': 1} {'type': 'loss', 'content': 0.17949321866035461, 'timestamp': '2025-10-01 04:16:35.765852', 'step': 832, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:35.818824', 'step': 832, 'epoch': 1} {'type': 'loss', 'content': 0.14296124875545502, 'timestamp': '2025-10-01 04:16:35.821420', 'step': 833, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:35.875096', 'step': 833, 'epoch': 1} {'type': 'loss', 'content': 0.1528983861207962, 'timestamp': '2025-10-01 04:16:35.877620', 'step': 834, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:16:35.932130', 'step': 834, 'epoch': 1} {'type': 'loss', 'content': 0.14341247081756592, 'timestamp': '2025-10-01 04:16:35.934473', 'step': 835, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:16:35.988440', 'step': 835, 'epoch': 1} {'type': 'loss', 'content': 0.16536933183670044, 'timestamp': '2025-10-01 04:16:35.994487', 'step': 836, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:36.047402', 'step': 836, 'epoch': 1} {'type': 'loss', 'content': 0.19361086189746857, 'timestamp': '2025-10-01 04:16:36.053292', 'step': 837, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:16:36.107166', 'step': 837, 'epoch': 1} {'type': 'loss', 'content': 0.21421867609024048, 'timestamp': '2025-10-01 04:16:36.109013', 'step': 838, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:36.165696', 'step': 838, 'epoch': 1} {'type': 'loss', 'content': 0.3134433627128601, 'timestamp': '2025-10-01 04:16:36.167571', 'step': 839, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:36.220367', 'step': 839, 'epoch': 1} {'type': 'loss', 'content': 0.2086295485496521, 'timestamp': '2025-10-01 04:16:36.226408', 'step': 840, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:36.279447', 'step': 840, 'epoch': 1} {'type': 'loss', 'content': 0.17537619173526764, 'timestamp': '2025-10-01 04:16:36.281569', 'step': 841, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:36.336456', 'step': 841, 'epoch': 1} {'type': 'loss', 'content': 0.13139604032039642, 'timestamp': '2025-10-01 04:16:36.338454', 'step': 842, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:36.392331', 'step': 842, 'epoch': 1} {'type': 'loss', 'content': 0.2322668880224228, 'timestamp': '2025-10-01 04:16:36.394288', 'step': 843, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:36.454570', 'step': 843, 'epoch': 1} {'type': 'loss', 'content': 0.25689277052879333, 'timestamp': '2025-10-01 04:16:36.460704', 'step': 844, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:36.515338', 'step': 844, 'epoch': 1} {'type': 'loss', 'content': 0.19027842581272125, 'timestamp': '2025-10-01 04:16:36.517347', 'step': 845, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:36.573068', 'step': 845, 'epoch': 1} {'type': 'loss', 'content': 0.2256782054901123, 'timestamp': '2025-10-01 04:16:36.574961', 'step': 846, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:36.630935', 'step': 846, 'epoch': 1} {'type': 'loss', 'content': 0.18636389076709747, 'timestamp': '2025-10-01 04:16:36.633040', 'step': 847, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:16:36.687889', 'step': 847, 'epoch': 1} {'type': 'loss', 'content': 0.24588413536548615, 'timestamp': '2025-10-01 04:16:36.694663', 'step': 848, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:36.748124', 'step': 848, 'epoch': 1} {'type': 'loss', 'content': 0.1489957571029663, 'timestamp': '2025-10-01 04:16:36.750379', 'step': 849, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:36.803695', 'step': 849, 'epoch': 1} {'type': 'loss', 'content': 0.16795195639133453, 'timestamp': '2025-10-01 04:16:36.805771', 'step': 850, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:36.859234', 'step': 850, 'epoch': 1} {'type': 'loss', 'content': 0.24173200130462646, 'timestamp': '2025-10-01 04:16:36.863706', 'step': 851, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:36.917179', 'step': 851, 'epoch': 1} {'type': 'loss', 'content': 0.1594408005475998, 'timestamp': '2025-10-01 04:16:36.923104', 'step': 852, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:36.975916', 'step': 852, 'epoch': 1} {'type': 'loss', 'content': 0.18057985603809357, 'timestamp': '2025-10-01 04:16:36.978149', 'step': 853, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:16:37.031256', 'step': 853, 'epoch': 1} {'type': 'loss', 'content': 0.2131732702255249, 'timestamp': '2025-10-01 04:16:37.033421', 'step': 854, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:37.088239', 'step': 854, 'epoch': 1} {'type': 'loss', 'content': 0.17105409502983093, 'timestamp': '2025-10-01 04:16:37.090287', 'step': 855, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:37.144929', 'step': 855, 'epoch': 1} {'type': 'loss', 'content': 0.16306394338607788, 'timestamp': '2025-10-01 04:16:37.151215', 'step': 856, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:37.204627', 'step': 856, 'epoch': 1} {'type': 'loss', 'content': 0.3069280683994293, 'timestamp': '2025-10-01 04:16:37.206672', 'step': 857, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:37.261644', 'step': 857, 'epoch': 1} {'type': 'loss', 'content': 0.21344858407974243, 'timestamp': '2025-10-01 04:16:37.263651', 'step': 858, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:37.318585', 'step': 858, 'epoch': 1} {'type': 'loss', 'content': 0.1302185356616974, 'timestamp': '2025-10-01 04:16:37.320596', 'step': 859, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:16:37.375180', 'step': 859, 'epoch': 1} {'type': 'loss', 'content': 0.18102799355983734, 'timestamp': '2025-10-01 04:16:37.381549', 'step': 860, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:16:37.435596', 'step': 860, 'epoch': 1} {'type': 'loss', 'content': 0.17669491469860077, 'timestamp': '2025-10-01 04:16:37.437574', 'step': 861, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:37.491750', 'step': 861, 'epoch': 1} {'type': 'loss', 'content': 0.08218399435281754, 'timestamp': '2025-10-01 04:16:37.493782', 'step': 862, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:16:37.548637', 'step': 862, 'epoch': 1} {'type': 'loss', 'content': 0.28218716382980347, 'timestamp': '2025-10-01 04:16:37.550766', 'step': 863, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:16:37.605281', 'step': 863, 'epoch': 1} {'type': 'loss', 'content': 0.16063615679740906, 'timestamp': '2025-10-01 04:16:37.611596', 'step': 864, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:37.665185', 'step': 864, 'epoch': 1} {'type': 'loss', 'content': 0.19078956544399261, 'timestamp': '2025-10-01 04:16:37.667225', 'step': 865, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:37.720814', 'step': 865, 'epoch': 1} {'type': 'loss', 'content': 0.21176297962665558, 'timestamp': '2025-10-01 04:16:37.722829', 'step': 866, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:37.776141', 'step': 866, 'epoch': 1} {'type': 'loss', 'content': 0.1971389353275299, 'timestamp': '2025-10-01 04:16:37.778131', 'step': 867, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:37.831160', 'step': 867, 'epoch': 1} {'type': 'loss', 'content': 0.24370619654655457, 'timestamp': '2025-10-01 04:16:37.837801', 'step': 868, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:37.890802', 'step': 868, 'epoch': 1} {'type': 'loss', 'content': 0.14149178564548492, 'timestamp': '2025-10-01 04:16:37.892818', 'step': 869, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:37.946096', 'step': 869, 'epoch': 1} {'type': 'loss', 'content': 0.220608189702034, 'timestamp': '2025-10-01 04:16:37.947875', 'step': 870, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:38.000660', 'step': 870, 'epoch': 1} {'type': 'loss', 'content': 0.10597579926252365, 'timestamp': '2025-10-01 04:16:38.002484', 'step': 871, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:16:38.055369', 'step': 871, 'epoch': 1} {'type': 'loss', 'content': 0.16814537346363068, 'timestamp': '2025-10-01 04:16:38.061150', 'step': 872, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:16:38.121505', 'step': 872, 'epoch': 1} {'type': 'loss', 'content': 0.2143223136663437, 'timestamp': '2025-10-01 04:16:38.124070', 'step': 873, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:38.182968', 'step': 873, 'epoch': 1} {'type': 'loss', 'content': 0.2789636254310608, 'timestamp': '2025-10-01 04:16:38.185223', 'step': 874, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:38.244745', 'step': 874, 'epoch': 1} {'type': 'loss', 'content': 0.21300998330116272, 'timestamp': '2025-10-01 04:16:38.246579', 'step': 875, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:38.302884', 'step': 875, 'epoch': 1} {'type': 'loss', 'content': 0.1651725023984909, 'timestamp': '2025-10-01 04:16:38.308957', 'step': 876, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:38.361293', 'step': 876, 'epoch': 1} {'type': 'loss', 'content': 0.17560625076293945, 'timestamp': '2025-10-01 04:16:38.363601', 'step': 877, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:38.416636', 'step': 877, 'epoch': 1} {'type': 'loss', 'content': 0.15196549892425537, 'timestamp': '2025-10-01 04:16:38.418822', 'step': 878, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:38.472465', 'step': 878, 'epoch': 1} {'type': 'loss', 'content': 0.19065944850444794, 'timestamp': '2025-10-01 04:16:38.474565', 'step': 879, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:38.528947', 'step': 879, 'epoch': 1} {'type': 'loss', 'content': 0.1569271981716156, 'timestamp': '2025-10-01 04:16:38.535028', 'step': 880, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:38.588109', 'step': 880, 'epoch': 1} {'type': 'loss', 'content': 0.17413818836212158, 'timestamp': '2025-10-01 04:16:38.590185', 'step': 881, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:38.643768', 'step': 881, 'epoch': 1} {'type': 'loss', 'content': 0.25264638662338257, 'timestamp': '2025-10-01 04:16:38.645884', 'step': 882, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:38.699519', 'step': 882, 'epoch': 1} {'type': 'loss', 'content': 0.1799539178609848, 'timestamp': '2025-10-01 04:16:38.701542', 'step': 883, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:38.754840', 'step': 883, 'epoch': 1} {'type': 'loss', 'content': 0.21786822378635406, 'timestamp': '2025-10-01 04:16:38.761263', 'step': 884, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:38.814203', 'step': 884, 'epoch': 1} {'type': 'loss', 'content': 0.13956591486930847, 'timestamp': '2025-10-01 04:16:38.816436', 'step': 885, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:38.870032', 'step': 885, 'epoch': 1} {'type': 'loss', 'content': 0.34737950563430786, 'timestamp': '2025-10-01 04:16:38.872064', 'step': 886, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:16:38.926188', 'step': 886, 'epoch': 1} {'type': 'loss', 'content': 0.23507484793663025, 'timestamp': '2025-10-01 04:16:38.928426', 'step': 887, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:38.984628', 'step': 887, 'epoch': 1} {'type': 'loss', 'content': 0.27245765924453735, 'timestamp': '2025-10-01 04:16:38.990365', 'step': 888, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:39.042533', 'step': 888, 'epoch': 1} {'type': 'loss', 'content': 0.24681559205055237, 'timestamp': '2025-10-01 04:16:39.044513', 'step': 889, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:16:39.097764', 'step': 889, 'epoch': 1} {'type': 'loss', 'content': 0.11121705919504166, 'timestamp': '2025-10-01 04:16:39.099935', 'step': 890, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:39.152458', 'step': 890, 'epoch': 1} {'type': 'loss', 'content': 0.22304169833660126, 'timestamp': '2025-10-01 04:16:39.156857', 'step': 891, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:39.209264', 'step': 891, 'epoch': 1} {'type': 'loss', 'content': 0.15863797068595886, 'timestamp': '2025-10-01 04:16:39.215142', 'step': 892, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:39.267187', 'step': 892, 'epoch': 1} {'type': 'loss', 'content': 0.13365401327610016, 'timestamp': '2025-10-01 04:16:39.269311', 'step': 893, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:39.321287', 'step': 893, 'epoch': 1} {'type': 'loss', 'content': 0.20267225801944733, 'timestamp': '2025-10-01 04:16:39.323324', 'step': 894, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:39.376053', 'step': 894, 'epoch': 1} {'type': 'loss', 'content': 0.11959799379110336, 'timestamp': '2025-10-01 04:16:39.378389', 'step': 895, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:39.431078', 'step': 895, 'epoch': 1} {'type': 'loss', 'content': 0.2182549089193344, 'timestamp': '2025-10-01 04:16:39.436403', 'step': 896, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:16:39.488184', 'step': 896, 'epoch': 1} {'type': 'loss', 'content': 0.16858859360218048, 'timestamp': '2025-10-01 04:16:39.490103', 'step': 897, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:39.541848', 'step': 897, 'epoch': 1} {'type': 'loss', 'content': 0.16923844814300537, 'timestamp': '2025-10-01 04:16:39.543820', 'step': 898, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:39.596029', 'step': 898, 'epoch': 1} {'type': 'loss', 'content': 0.23379191756248474, 'timestamp': '2025-10-01 04:16:39.598260', 'step': 899, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:39.651302', 'step': 899, 'epoch': 1} {'type': 'loss', 'content': 0.3516694903373718, 'timestamp': '2025-10-01 04:16:39.657039', 'step': 900, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:39.718591', 'step': 900, 'epoch': 1} {'type': 'loss', 'content': 0.142965629696846, 'timestamp': '2025-10-01 04:16:39.720568', 'step': 901, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:39.775405', 'step': 901, 'epoch': 1} {'type': 'loss', 'content': 0.15272600948810577, 'timestamp': '2025-10-01 04:16:39.777338', 'step': 902, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:16:39.830047', 'step': 902, 'epoch': 1} {'type': 'loss', 'content': 0.18299928307533264, 'timestamp': '2025-10-01 04:16:39.832160', 'step': 903, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:39.884544', 'step': 903, 'epoch': 1} {'type': 'loss', 'content': 0.22305947542190552, 'timestamp': '2025-10-01 04:16:39.890160', 'step': 904, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:16:39.946550', 'step': 904, 'epoch': 1} {'type': 'loss', 'content': 0.17704680562019348, 'timestamp': '2025-10-01 04:16:39.948484', 'step': 905, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:16:40.001761', 'step': 905, 'epoch': 1} {'type': 'loss', 'content': 0.21994216740131378, 'timestamp': '2025-10-01 04:16:40.003960', 'step': 906, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:40.056681', 'step': 906, 'epoch': 1} {'type': 'loss', 'content': 0.1482366919517517, 'timestamp': '2025-10-01 04:16:40.058964', 'step': 907, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:40.111575', 'step': 907, 'epoch': 1} {'type': 'loss', 'content': 0.20758040249347687, 'timestamp': '2025-10-01 04:16:40.117694', 'step': 908, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:16:40.169755', 'step': 908, 'epoch': 1} {'type': 'loss', 'content': 0.2746098041534424, 'timestamp': '2025-10-01 04:16:40.171738', 'step': 909, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:40.224524', 'step': 909, 'epoch': 1} {'type': 'loss', 'content': 0.17970027029514313, 'timestamp': '2025-10-01 04:16:40.226568', 'step': 910, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:40.281432', 'step': 910, 'epoch': 1} {'type': 'loss', 'content': 0.21886296570301056, 'timestamp': '2025-10-01 04:16:40.283597', 'step': 911, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:40.335726', 'step': 911, 'epoch': 1} {'type': 'loss', 'content': 0.20866355299949646, 'timestamp': '2025-10-01 04:16:40.341268', 'step': 912, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:40.393201', 'step': 912, 'epoch': 1} {'type': 'loss', 'content': 0.14130115509033203, 'timestamp': '2025-10-01 04:16:40.395130', 'step': 913, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:40.447691', 'step': 913, 'epoch': 1} {'type': 'loss', 'content': 0.10802749544382095, 'timestamp': '2025-10-01 04:16:40.449563', 'step': 914, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-01 04:16:40.502786', 'step': 914, 'epoch': 1} {'type': 'loss', 'content': 0.24123401939868927, 'timestamp': '2025-10-01 04:16:40.504807', 'step': 915, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:40.557553', 'step': 915, 'epoch': 1} {'type': 'loss', 'content': 0.18706290423870087, 'timestamp': '2025-10-01 04:16:40.563880', 'step': 916, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:40.616097', 'step': 916, 'epoch': 1} {'type': 'loss', 'content': 0.3026745915412903, 'timestamp': '2025-10-01 04:16:40.618039', 'step': 917, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:40.671370', 'step': 917, 'epoch': 1} {'type': 'loss', 'content': 0.17380735278129578, 'timestamp': '2025-10-01 04:16:40.673262', 'step': 918, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:40.726437', 'step': 918, 'epoch': 1} {'type': 'loss', 'content': 0.3257386088371277, 'timestamp': '2025-10-01 04:16:40.728276', 'step': 919, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:40.782804', 'step': 919, 'epoch': 1} {'type': 'loss', 'content': 0.13617601990699768, 'timestamp': '2025-10-01 04:16:40.789105', 'step': 920, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:40.842876', 'step': 920, 'epoch': 1} {'type': 'loss', 'content': 0.2579224407672882, 'timestamp': '2025-10-01 04:16:40.845205', 'step': 921, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:40.898432', 'step': 921, 'epoch': 1} {'type': 'loss', 'content': 0.16475749015808105, 'timestamp': '2025-10-01 04:16:40.900565', 'step': 922, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:40.953774', 'step': 922, 'epoch': 1} {'type': 'loss', 'content': 0.2064191997051239, 'timestamp': '2025-10-01 04:16:40.955821', 'step': 923, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:41.008587', 'step': 923, 'epoch': 1} {'type': 'loss', 'content': 0.26918578147888184, 'timestamp': '2025-10-01 04:16:41.014706', 'step': 924, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:16:41.070905', 'step': 924, 'epoch': 1} {'type': 'loss', 'content': 0.13852658867835999, 'timestamp': '2025-10-01 04:16:41.072862', 'step': 925, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:41.125854', 'step': 925, 'epoch': 1} {'type': 'loss', 'content': 0.2524332106113434, 'timestamp': '2025-10-01 04:16:41.127931', 'step': 926, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:41.183277', 'step': 926, 'epoch': 1} {'type': 'loss', 'content': 0.2841121256351471, 'timestamp': '2025-10-01 04:16:41.185405', 'step': 927, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:41.237603', 'step': 927, 'epoch': 1} {'type': 'loss', 'content': 0.162920281291008, 'timestamp': '2025-10-01 04:16:41.243165', 'step': 928, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:16:41.295519', 'step': 928, 'epoch': 1} {'type': 'loss', 'content': 0.15452367067337036, 'timestamp': '2025-10-01 04:16:41.298148', 'step': 929, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:41.350909', 'step': 929, 'epoch': 1} {'type': 'loss', 'content': 0.17834793031215668, 'timestamp': '2025-10-01 04:16:41.353189', 'step': 930, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:41.406286', 'step': 930, 'epoch': 1} {'type': 'loss', 'content': 0.1677916944026947, 'timestamp': '2025-10-01 04:16:41.408352', 'step': 931, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:41.460978', 'step': 931, 'epoch': 1} {'type': 'loss', 'content': 0.329245924949646, 'timestamp': '2025-10-01 04:16:41.467150', 'step': 932, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-01 04:16:54.919000', 'step': 932, 'epoch': 1} {'type': 'pplx', 'content': 9518.3314356812, 'timestamp': '2025-10-01 04:16:54.921787', 'step': 932, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:54.975262', 'step': 932, 'epoch': 1} {'type': 'loss', 'content': 0.11585446447134018, 'timestamp': '2025-10-01 04:16:54.977271', 'step': 933, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:55.030803', 'step': 933, 'epoch': 1} {'type': 'loss', 'content': 0.13500745594501495, 'timestamp': '2025-10-01 04:16:55.032596', 'step': 934, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:55.085122', 'step': 934, 'epoch': 1} {'type': 'loss', 'content': 0.273563414812088, 'timestamp': '2025-10-01 04:16:55.087791', 'step': 935, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:55.146993', 'step': 935, 'epoch': 1} {'type': 'loss', 'content': 0.14200057089328766, 'timestamp': '2025-10-01 04:16:55.152691', 'step': 936, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:55.204623', 'step': 936, 'epoch': 1} {'type': 'loss', 'content': 0.2443327009677887, 'timestamp': '2025-10-01 04:16:55.214063', 'step': 937, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:55.265962', 'step': 937, 'epoch': 1} {'type': 'loss', 'content': 0.29032814502716064, 'timestamp': '2025-10-01 04:16:55.267897', 'step': 938, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:16:55.321142', 'step': 938, 'epoch': 1} {'type': 'loss', 'content': 0.23430617153644562, 'timestamp': '2025-10-01 04:16:55.323278', 'step': 939, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:55.375643', 'step': 939, 'epoch': 1} {'type': 'loss', 'content': 0.22686436772346497, 'timestamp': '2025-10-01 04:16:55.381381', 'step': 940, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:55.432892', 'step': 940, 'epoch': 1} {'type': 'loss', 'content': 0.1546429991722107, 'timestamp': '2025-10-01 04:16:55.435086', 'step': 941, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:55.487421', 'step': 941, 'epoch': 1} {'type': 'loss', 'content': 0.24170386791229248, 'timestamp': '2025-10-01 04:16:55.489660', 'step': 942, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:55.547807', 'step': 942, 'epoch': 1} {'type': 'loss', 'content': 0.21429137885570526, 'timestamp': '2025-10-01 04:16:55.549988', 'step': 943, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:55.602965', 'step': 943, 'epoch': 1} {'type': 'loss', 'content': 0.12851941585540771, 'timestamp': '2025-10-01 04:16:55.608835', 'step': 944, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:55.660595', 'step': 944, 'epoch': 1} {'type': 'loss', 'content': 0.19351136684417725, 'timestamp': '2025-10-01 04:16:55.662895', 'step': 945, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:55.722081', 'step': 945, 'epoch': 1} {'type': 'loss', 'content': 0.11413364112377167, 'timestamp': '2025-10-01 04:16:55.723978', 'step': 946, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:55.777050', 'step': 946, 'epoch': 1} {'type': 'loss', 'content': 0.23459304869174957, 'timestamp': '2025-10-01 04:16:55.779907', 'step': 947, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:55.832460', 'step': 947, 'epoch': 1} {'type': 'loss', 'content': 0.22450251877307892, 'timestamp': '2025-10-01 04:16:55.838260', 'step': 948, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:16:55.896542', 'step': 948, 'epoch': 1} {'type': 'loss', 'content': 0.21725353598594666, 'timestamp': '2025-10-01 04:16:55.898769', 'step': 949, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:55.951657', 'step': 949, 'epoch': 1} {'type': 'loss', 'content': 0.16852687299251556, 'timestamp': '2025-10-01 04:16:55.953790', 'step': 950, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:56.019200', 'step': 950, 'epoch': 1} {'type': 'loss', 'content': 0.18302583694458008, 'timestamp': '2025-10-01 04:16:56.021215', 'step': 951, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:56.074773', 'step': 951, 'epoch': 1} {'type': 'loss', 'content': 0.19625502824783325, 'timestamp': '2025-10-01 04:16:56.089849', 'step': 952, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:56.142942', 'step': 952, 'epoch': 1} {'type': 'loss', 'content': 0.10675748437643051, 'timestamp': '2025-10-01 04:16:56.144784', 'step': 953, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:16:56.197427', 'step': 953, 'epoch': 1} {'type': 'loss', 'content': 0.14053022861480713, 'timestamp': '2025-10-01 04:16:56.199678', 'step': 954, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:56.252850', 'step': 954, 'epoch': 1} {'type': 'loss', 'content': 0.13013269007205963, 'timestamp': '2025-10-01 04:16:56.255528', 'step': 955, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:56.309109', 'step': 955, 'epoch': 1} {'type': 'loss', 'content': 0.12885648012161255, 'timestamp': '2025-10-01 04:16:56.314937', 'step': 956, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:56.367802', 'step': 956, 'epoch': 1} {'type': 'loss', 'content': 0.22568342089653015, 'timestamp': '2025-10-01 04:16:56.369773', 'step': 957, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:56.423012', 'step': 957, 'epoch': 1} {'type': 'loss', 'content': 0.16758131980895996, 'timestamp': '2025-10-01 04:16:56.425119', 'step': 958, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:56.477504', 'step': 958, 'epoch': 1} {'type': 'loss', 'content': 0.24050338566303253, 'timestamp': '2025-10-01 04:16:56.479568', 'step': 959, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:56.532603', 'step': 959, 'epoch': 1} {'type': 'loss', 'content': 0.17455752193927765, 'timestamp': '2025-10-01 04:16:56.538685', 'step': 960, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:56.590999', 'step': 960, 'epoch': 1} {'type': 'loss', 'content': 0.1699124574661255, 'timestamp': '2025-10-01 04:16:56.592998', 'step': 961, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:56.646163', 'step': 961, 'epoch': 1} {'type': 'loss', 'content': 0.18914687633514404, 'timestamp': '2025-10-01 04:16:56.648053', 'step': 962, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:56.701478', 'step': 962, 'epoch': 1} {'type': 'loss', 'content': 0.18978306651115417, 'timestamp': '2025-10-01 04:16:56.703523', 'step': 963, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:56.756289', 'step': 963, 'epoch': 1} {'type': 'loss', 'content': 0.26991206407546997, 'timestamp': '2025-10-01 04:16:56.761860', 'step': 964, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:16:56.814205', 'step': 964, 'epoch': 1} {'type': 'loss', 'content': 0.2072569578886032, 'timestamp': '2025-10-01 04:16:56.816122', 'step': 965, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:56.868675', 'step': 965, 'epoch': 1} {'type': 'loss', 'content': 0.24047839641571045, 'timestamp': '2025-10-01 04:16:56.877824', 'step': 966, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:16:56.930575', 'step': 966, 'epoch': 1} {'type': 'loss', 'content': 0.19997617602348328, 'timestamp': '2025-10-01 04:16:56.933831', 'step': 967, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:56.986652', 'step': 967, 'epoch': 1} {'type': 'loss', 'content': 0.2494017332792282, 'timestamp': '2025-10-01 04:16:56.992477', 'step': 968, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:57.044848', 'step': 968, 'epoch': 1} {'type': 'loss', 'content': 0.17351959645748138, 'timestamp': '2025-10-01 04:16:57.046910', 'step': 969, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:57.099357', 'step': 969, 'epoch': 1} {'type': 'loss', 'content': 0.26954248547554016, 'timestamp': '2025-10-01 04:16:57.101353', 'step': 970, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:57.154588', 'step': 970, 'epoch': 1} {'type': 'loss', 'content': 0.15686661005020142, 'timestamp': '2025-10-01 04:16:57.156598', 'step': 971, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:57.210174', 'step': 971, 'epoch': 1} {'type': 'loss', 'content': 0.2483425885438919, 'timestamp': '2025-10-01 04:16:57.215654', 'step': 972, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:57.268211', 'step': 972, 'epoch': 1} {'type': 'loss', 'content': 0.1765677034854889, 'timestamp': '2025-10-01 04:16:57.270253', 'step': 973, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:57.322859', 'step': 973, 'epoch': 1} {'type': 'loss', 'content': 0.1560303121805191, 'timestamp': '2025-10-01 04:16:57.324796', 'step': 974, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:57.377485', 'step': 974, 'epoch': 1} {'type': 'loss', 'content': 0.2298029065132141, 'timestamp': '2025-10-01 04:16:57.379337', 'step': 975, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:16:57.431903', 'step': 975, 'epoch': 1} {'type': 'loss', 'content': 0.1704358458518982, 'timestamp': '2025-10-01 04:16:57.437617', 'step': 976, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:57.489942', 'step': 976, 'epoch': 1} {'type': 'loss', 'content': 0.1793455183506012, 'timestamp': '2025-10-01 04:16:57.491843', 'step': 977, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:57.552163', 'step': 977, 'epoch': 1} {'type': 'loss', 'content': 0.2154807597398758, 'timestamp': '2025-10-01 04:16:57.554364', 'step': 978, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:57.607255', 'step': 978, 'epoch': 1} {'type': 'loss', 'content': 0.259470134973526, 'timestamp': '2025-10-01 04:16:57.609025', 'step': 979, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:57.667186', 'step': 979, 'epoch': 1} {'type': 'loss', 'content': 0.2015562355518341, 'timestamp': '2025-10-01 04:16:57.673695', 'step': 980, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:57.725767', 'step': 980, 'epoch': 1} {'type': 'loss', 'content': 0.1402294784784317, 'timestamp': '2025-10-01 04:16:57.727613', 'step': 981, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:57.779992', 'step': 981, 'epoch': 1} {'type': 'loss', 'content': 0.14841745793819427, 'timestamp': '2025-10-01 04:16:57.781816', 'step': 982, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:57.834617', 'step': 982, 'epoch': 1} {'type': 'loss', 'content': 0.13159207999706268, 'timestamp': '2025-10-01 04:16:57.836637', 'step': 983, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:57.888832', 'step': 983, 'epoch': 1} {'type': 'loss', 'content': 0.34627389907836914, 'timestamp': '2025-10-01 04:16:57.894228', 'step': 984, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:57.945743', 'step': 984, 'epoch': 1} {'type': 'loss', 'content': 0.17461895942687988, 'timestamp': '2025-10-01 04:16:57.948103', 'step': 985, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:58.000773', 'step': 985, 'epoch': 1} {'type': 'loss', 'content': 0.08661048859357834, 'timestamp': '2025-10-01 04:16:58.002658', 'step': 986, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:58.054759', 'step': 986, 'epoch': 1} {'type': 'loss', 'content': 0.15717417001724243, 'timestamp': '2025-10-01 04:16:58.056822', 'step': 987, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:58.109721', 'step': 987, 'epoch': 1} {'type': 'loss', 'content': 0.2927882969379425, 'timestamp': '2025-10-01 04:16:58.115614', 'step': 988, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:58.167439', 'step': 988, 'epoch': 1} {'type': 'loss', 'content': 0.2515356242656708, 'timestamp': '2025-10-01 04:16:58.169596', 'step': 989, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:58.221785', 'step': 989, 'epoch': 1} {'type': 'loss', 'content': 0.15933901071548462, 'timestamp': '2025-10-01 04:16:58.223824', 'step': 990, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:58.276127', 'step': 990, 'epoch': 1} {'type': 'loss', 'content': 0.12254934012889862, 'timestamp': '2025-10-01 04:16:58.278159', 'step': 991, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:58.331097', 'step': 991, 'epoch': 1} {'type': 'loss', 'content': 0.18692490458488464, 'timestamp': '2025-10-01 04:16:58.336697', 'step': 992, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:58.389008', 'step': 992, 'epoch': 1} {'type': 'loss', 'content': 0.18967726826667786, 'timestamp': '2025-10-01 04:16:58.391450', 'step': 993, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:16:58.443845', 'step': 993, 'epoch': 1} {'type': 'loss', 'content': 0.2374412566423416, 'timestamp': '2025-10-01 04:16:58.445783', 'step': 994, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:58.497967', 'step': 994, 'epoch': 1} {'type': 'loss', 'content': 0.16486459970474243, 'timestamp': '2025-10-01 04:16:58.499878', 'step': 995, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:16:58.551991', 'step': 995, 'epoch': 1} {'type': 'loss', 'content': 0.2124500274658203, 'timestamp': '2025-10-01 04:16:58.557600', 'step': 996, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:58.609720', 'step': 996, 'epoch': 1} {'type': 'loss', 'content': 0.17726756632328033, 'timestamp': '2025-10-01 04:16:58.611767', 'step': 997, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:58.664054', 'step': 997, 'epoch': 1} {'type': 'loss', 'content': 0.20203189551830292, 'timestamp': '2025-10-01 04:16:58.665943', 'step': 998, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:58.724780', 'step': 998, 'epoch': 1} {'type': 'loss', 'content': 0.1857849657535553, 'timestamp': '2025-10-01 04:16:58.726634', 'step': 999, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:58.779875', 'step': 999, 'epoch': 1} {'type': 'loss', 'content': 0.13214777410030365, 'timestamp': '2025-10-01 04:16:58.785340', 'step': 1000, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 1000', 'timestamp': '2025-10-01 04:16:59.172871', 'step': 1000, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:59.224862', 'step': 1000, 'epoch': 1} {'type': 'loss', 'content': 0.2081872522830963, 'timestamp': '2025-10-01 04:16:59.226892', 'step': 1001, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:59.286783', 'step': 1001, 'epoch': 1} {'type': 'loss', 'content': 0.21707889437675476, 'timestamp': '2025-10-01 04:16:59.289426', 'step': 1002, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:59.342772', 'step': 1002, 'epoch': 1} {'type': 'loss', 'content': 0.29670238494873047, 'timestamp': '2025-10-01 04:16:59.346181', 'step': 1003, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:59.405976', 'step': 1003, 'epoch': 1} {'type': 'loss', 'content': 0.18971610069274902, 'timestamp': '2025-10-01 04:16:59.411668', 'step': 1004, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:59.464882', 'step': 1004, 'epoch': 1} {'type': 'loss', 'content': 0.2020762711763382, 'timestamp': '2025-10-01 04:16:59.466902', 'step': 1005, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:59.528092', 'step': 1005, 'epoch': 1} {'type': 'loss', 'content': 0.18404129147529602, 'timestamp': '2025-10-01 04:16:59.530184', 'step': 1006, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:59.595011', 'step': 1006, 'epoch': 1} {'type': 'loss', 'content': 0.20929129421710968, 'timestamp': '2025-10-01 04:16:59.599233', 'step': 1007, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:16:59.653823', 'step': 1007, 'epoch': 1} {'type': 'loss', 'content': 0.15040422976016998, 'timestamp': '2025-10-01 04:16:59.659922', 'step': 1008, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:59.713870', 'step': 1008, 'epoch': 1} {'type': 'loss', 'content': 0.23125407099723816, 'timestamp': '2025-10-01 04:16:59.715886', 'step': 1009, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:16:59.774527', 'step': 1009, 'epoch': 1} {'type': 'loss', 'content': 0.12097246199846268, 'timestamp': '2025-10-01 04:16:59.781634', 'step': 1010, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:16:59.835882', 'step': 1010, 'epoch': 1} {'type': 'loss', 'content': 0.22012677788734436, 'timestamp': '2025-10-01 04:16:59.838260', 'step': 1011, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:16:59.892807', 'step': 1011, 'epoch': 1} {'type': 'loss', 'content': 0.1833105832338333, 'timestamp': '2025-10-01 04:16:59.908204', 'step': 1012, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:16:59.965144', 'step': 1012, 'epoch': 1} {'type': 'loss', 'content': 0.20857293903827667, 'timestamp': '2025-10-01 04:16:59.967475', 'step': 1013, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:00.026139', 'step': 1013, 'epoch': 1} {'type': 'loss', 'content': 0.20550952851772308, 'timestamp': '2025-10-01 04:17:00.028681', 'step': 1014, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:00.081989', 'step': 1014, 'epoch': 1} {'type': 'loss', 'content': 0.13571767508983612, 'timestamp': '2025-10-01 04:17:00.084545', 'step': 1015, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:00.147215', 'step': 1015, 'epoch': 1} {'type': 'loss', 'content': 0.30486980080604553, 'timestamp': '2025-10-01 04:17:00.153236', 'step': 1016, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:00.205607', 'step': 1016, 'epoch': 1} {'type': 'loss', 'content': 0.14973703026771545, 'timestamp': '2025-10-01 04:17:00.208006', 'step': 1017, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:00.262252', 'step': 1017, 'epoch': 1} {'type': 'loss', 'content': 0.12948685884475708, 'timestamp': '2025-10-01 04:17:00.264495', 'step': 1018, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:00.330598', 'step': 1018, 'epoch': 1} {'type': 'loss', 'content': 0.15282069146633148, 'timestamp': '2025-10-01 04:17:00.332595', 'step': 1019, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:00.392296', 'step': 1019, 'epoch': 1} {'type': 'loss', 'content': 0.20506834983825684, 'timestamp': '2025-10-01 04:17:00.397879', 'step': 1020, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:00.450465', 'step': 1020, 'epoch': 1} {'type': 'loss', 'content': 0.19207799434661865, 'timestamp': '2025-10-01 04:17:00.452841', 'step': 1021, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:00.508611', 'step': 1021, 'epoch': 1} {'type': 'loss', 'content': 0.2791666090488434, 'timestamp': '2025-10-01 04:17:00.510660', 'step': 1022, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:00.564060', 'step': 1022, 'epoch': 1} {'type': 'loss', 'content': 0.18875643610954285, 'timestamp': '2025-10-01 04:17:00.566557', 'step': 1023, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:00.621751', 'step': 1023, 'epoch': 1} {'type': 'loss', 'content': 0.17159514129161835, 'timestamp': '2025-10-01 04:17:00.627627', 'step': 1024, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:00.688420', 'step': 1024, 'epoch': 1} {'type': 'loss', 'content': 0.12857717275619507, 'timestamp': '2025-10-01 04:17:00.691069', 'step': 1025, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:00.762466', 'step': 1025, 'epoch': 1} {'type': 'loss', 'content': 0.22236302495002747, 'timestamp': '2025-10-01 04:17:00.764555', 'step': 1026, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:00.818657', 'step': 1026, 'epoch': 1} {'type': 'loss', 'content': 0.15283723175525665, 'timestamp': '2025-10-01 04:17:00.820793', 'step': 1027, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:00.873902', 'step': 1027, 'epoch': 1} {'type': 'loss', 'content': 0.2601125240325928, 'timestamp': '2025-10-01 04:17:00.879517', 'step': 1028, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:00.932393', 'step': 1028, 'epoch': 1} {'type': 'loss', 'content': 0.2539942264556885, 'timestamp': '2025-10-01 04:17:00.935161', 'step': 1029, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:17:00.988886', 'step': 1029, 'epoch': 1} {'type': 'loss', 'content': 0.16015037894248962, 'timestamp': '2025-10-01 04:17:00.990860', 'step': 1030, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:01.053814', 'step': 1030, 'epoch': 1} {'type': 'loss', 'content': 0.3139144480228424, 'timestamp': '2025-10-01 04:17:01.056506', 'step': 1031, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:01.111190', 'step': 1031, 'epoch': 1} {'type': 'loss', 'content': 0.21161465346813202, 'timestamp': '2025-10-01 04:17:01.116999', 'step': 1032, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:01.169320', 'step': 1032, 'epoch': 1} {'type': 'loss', 'content': 0.27687564492225647, 'timestamp': '2025-10-01 04:17:01.171499', 'step': 1033, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:01.228483', 'step': 1033, 'epoch': 1} {'type': 'loss', 'content': 0.14877232909202576, 'timestamp': '2025-10-01 04:17:01.230457', 'step': 1034, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:01.285472', 'step': 1034, 'epoch': 1} {'type': 'loss', 'content': 0.3095169961452484, 'timestamp': '2025-10-01 04:17:01.290477', 'step': 1035, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:01.348303', 'step': 1035, 'epoch': 1} {'type': 'loss', 'content': 0.25803861021995544, 'timestamp': '2025-10-01 04:17:01.353745', 'step': 1036, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:01.406982', 'step': 1036, 'epoch': 1} {'type': 'loss', 'content': 0.21753033995628357, 'timestamp': '2025-10-01 04:17:01.409275', 'step': 1037, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:01.471570', 'step': 1037, 'epoch': 1} {'type': 'loss', 'content': 0.16870073974132538, 'timestamp': '2025-10-01 04:17:01.473661', 'step': 1038, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:01.526304', 'step': 1038, 'epoch': 1} {'type': 'loss', 'content': 0.12352924793958664, 'timestamp': '2025-10-01 04:17:01.528038', 'step': 1039, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:17:01.580979', 'step': 1039, 'epoch': 1} {'type': 'loss', 'content': 0.20750176906585693, 'timestamp': '2025-10-01 04:17:01.596514', 'step': 1040, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:01.650003', 'step': 1040, 'epoch': 1} {'type': 'loss', 'content': 0.24696506559848785, 'timestamp': '2025-10-01 04:17:01.651937', 'step': 1041, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:01.704032', 'step': 1041, 'epoch': 1} {'type': 'loss', 'content': 0.2502462565898895, 'timestamp': '2025-10-01 04:17:01.706098', 'step': 1042, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:17:01.758931', 'step': 1042, 'epoch': 1} {'type': 'loss', 'content': 0.16008010506629944, 'timestamp': '2025-10-01 04:17:01.760956', 'step': 1043, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:01.813376', 'step': 1043, 'epoch': 1} {'type': 'loss', 'content': 0.22424419224262238, 'timestamp': '2025-10-01 04:17:01.818760', 'step': 1044, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:01.870647', 'step': 1044, 'epoch': 1} {'type': 'loss', 'content': 0.14381611347198486, 'timestamp': '2025-10-01 04:17:01.872872', 'step': 1045, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:01.928672', 'step': 1045, 'epoch': 1} {'type': 'loss', 'content': 0.15066710114479065, 'timestamp': '2025-10-01 04:17:01.930776', 'step': 1046, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:01.983894', 'step': 1046, 'epoch': 1} {'type': 'loss', 'content': 0.21045507490634918, 'timestamp': '2025-10-01 04:17:01.985837', 'step': 1047, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:02.052642', 'step': 1047, 'epoch': 1} {'type': 'loss', 'content': 0.235793799161911, 'timestamp': '2025-10-01 04:17:02.068845', 'step': 1048, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:02.121002', 'step': 1048, 'epoch': 1} {'type': 'loss', 'content': 0.2399352341890335, 'timestamp': '2025-10-01 04:17:02.123010', 'step': 1049, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:02.176146', 'step': 1049, 'epoch': 1} {'type': 'loss', 'content': 0.17613357305526733, 'timestamp': '2025-10-01 04:17:02.177854', 'step': 1050, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:02.232150', 'step': 1050, 'epoch': 1} {'type': 'loss', 'content': 0.12729091942310333, 'timestamp': '2025-10-01 04:17:02.234134', 'step': 1051, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:02.286011', 'step': 1051, 'epoch': 1} {'type': 'loss', 'content': 0.15627500414848328, 'timestamp': '2025-10-01 04:17:02.291640', 'step': 1052, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:02.357470', 'step': 1052, 'epoch': 1} {'type': 'loss', 'content': 0.18802644312381744, 'timestamp': '2025-10-01 04:17:02.359604', 'step': 1053, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:02.411715', 'step': 1053, 'epoch': 1} {'type': 'loss', 'content': 0.15585613250732422, 'timestamp': '2025-10-01 04:17:02.413766', 'step': 1054, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:02.467423', 'step': 1054, 'epoch': 1} {'type': 'loss', 'content': 0.1272479146718979, 'timestamp': '2025-10-01 04:17:02.470176', 'step': 1055, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:17:02.522592', 'step': 1055, 'epoch': 1} {'type': 'loss', 'content': 0.21838736534118652, 'timestamp': '2025-10-01 04:17:02.528036', 'step': 1056, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:02.581049', 'step': 1056, 'epoch': 1} {'type': 'loss', 'content': 0.2858371436595917, 'timestamp': '2025-10-01 04:17:02.589867', 'step': 1057, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:02.675999', 'step': 1057, 'epoch': 1} {'type': 'loss', 'content': 0.1751209795475006, 'timestamp': '2025-10-01 04:17:02.688066', 'step': 1058, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:02.742192', 'step': 1058, 'epoch': 1} {'type': 'loss', 'content': 0.2089967131614685, 'timestamp': '2025-10-01 04:17:02.744437', 'step': 1059, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:02.796842', 'step': 1059, 'epoch': 1} {'type': 'loss', 'content': 0.25812962651252747, 'timestamp': '2025-10-01 04:17:02.802582', 'step': 1060, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:02.855505', 'step': 1060, 'epoch': 1} {'type': 'loss', 'content': 0.3249090611934662, 'timestamp': '2025-10-01 04:17:02.859882', 'step': 1061, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:02.913160', 'step': 1061, 'epoch': 1} {'type': 'loss', 'content': 0.14739979803562164, 'timestamp': '2025-10-01 04:17:02.914900', 'step': 1062, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:02.967115', 'step': 1062, 'epoch': 1} {'type': 'loss', 'content': 0.20350243151187897, 'timestamp': '2025-10-01 04:17:02.969259', 'step': 1063, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:03.023222', 'step': 1063, 'epoch': 1} {'type': 'loss', 'content': 0.166414275765419, 'timestamp': '2025-10-01 04:17:03.028835', 'step': 1064, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:03.083832', 'step': 1064, 'epoch': 1} {'type': 'loss', 'content': 0.1990756243467331, 'timestamp': '2025-10-01 04:17:03.085722', 'step': 1065, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:03.138575', 'step': 1065, 'epoch': 1} {'type': 'loss', 'content': 0.2036755532026291, 'timestamp': '2025-10-01 04:17:03.140599', 'step': 1066, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:17:03.193439', 'step': 1066, 'epoch': 1} {'type': 'loss', 'content': 0.23569190502166748, 'timestamp': '2025-10-01 04:17:03.195438', 'step': 1067, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:03.248000', 'step': 1067, 'epoch': 1} {'type': 'loss', 'content': 0.17332856357097626, 'timestamp': '2025-10-01 04:17:03.253753', 'step': 1068, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:03.306186', 'step': 1068, 'epoch': 1} {'type': 'loss', 'content': 0.152461439371109, 'timestamp': '2025-10-01 04:17:03.308183', 'step': 1069, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:03.360487', 'step': 1069, 'epoch': 1} {'type': 'loss', 'content': 0.19604341685771942, 'timestamp': '2025-10-01 04:17:03.362528', 'step': 1070, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:03.415384', 'step': 1070, 'epoch': 1} {'type': 'loss', 'content': 0.21282686293125153, 'timestamp': '2025-10-01 04:17:03.418136', 'step': 1071, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:03.473905', 'step': 1071, 'epoch': 1} {'type': 'loss', 'content': 0.20317628979682922, 'timestamp': '2025-10-01 04:17:03.479857', 'step': 1072, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:03.541303', 'step': 1072, 'epoch': 1} {'type': 'loss', 'content': 0.1872943639755249, 'timestamp': '2025-10-01 04:17:03.543521', 'step': 1073, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:03.595969', 'step': 1073, 'epoch': 1} {'type': 'loss', 'content': 0.19772708415985107, 'timestamp': '2025-10-01 04:17:03.599961', 'step': 1074, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:03.652123', 'step': 1074, 'epoch': 1} {'type': 'loss', 'content': 0.17175978422164917, 'timestamp': '2025-10-01 04:17:03.654595', 'step': 1075, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:03.707542', 'step': 1075, 'epoch': 1} {'type': 'loss', 'content': 0.12783761322498322, 'timestamp': '2025-10-01 04:17:03.713274', 'step': 1076, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:03.765190', 'step': 1076, 'epoch': 1} {'type': 'loss', 'content': 0.243284672498703, 'timestamp': '2025-10-01 04:17:03.767359', 'step': 1077, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:03.820406', 'step': 1077, 'epoch': 1} {'type': 'loss', 'content': 0.17852362990379333, 'timestamp': '2025-10-01 04:17:03.822352', 'step': 1078, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:03.875184', 'step': 1078, 'epoch': 1} {'type': 'loss', 'content': 0.12666204571723938, 'timestamp': '2025-10-01 04:17:03.877139', 'step': 1079, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:03.929240', 'step': 1079, 'epoch': 1} {'type': 'loss', 'content': 0.2584574222564697, 'timestamp': '2025-10-01 04:17:03.934957', 'step': 1080, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:17:03.986748', 'step': 1080, 'epoch': 1} {'type': 'loss', 'content': 0.147058367729187, 'timestamp': '2025-10-01 04:17:03.989573', 'step': 1081, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:04.041889', 'step': 1081, 'epoch': 1} {'type': 'loss', 'content': 0.13576218485832214, 'timestamp': '2025-10-01 04:17:04.043925', 'step': 1082, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:04.096629', 'step': 1082, 'epoch': 1} {'type': 'loss', 'content': 0.27316421270370483, 'timestamp': '2025-10-01 04:17:04.098372', 'step': 1083, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:04.150258', 'step': 1083, 'epoch': 1} {'type': 'loss', 'content': 0.09915591776371002, 'timestamp': '2025-10-01 04:17:04.155760', 'step': 1084, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:04.207717', 'step': 1084, 'epoch': 1} {'type': 'loss', 'content': 0.2321685552597046, 'timestamp': '2025-10-01 04:17:04.209463', 'step': 1085, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:17:04.262267', 'step': 1085, 'epoch': 1} {'type': 'loss', 'content': 0.18887893855571747, 'timestamp': '2025-10-01 04:17:04.264233', 'step': 1086, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:04.316929', 'step': 1086, 'epoch': 1} {'type': 'loss', 'content': 0.1847182959318161, 'timestamp': '2025-10-01 04:17:04.325941', 'step': 1087, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:04.378091', 'step': 1087, 'epoch': 1} {'type': 'loss', 'content': 0.23845553398132324, 'timestamp': '2025-10-01 04:17:04.383813', 'step': 1088, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:17:04.435599', 'step': 1088, 'epoch': 1} {'type': 'loss', 'content': 0.11800743639469147, 'timestamp': '2025-10-01 04:17:04.437776', 'step': 1089, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:04.494806', 'step': 1089, 'epoch': 1} {'type': 'loss', 'content': 0.151970773935318, 'timestamp': '2025-10-01 04:17:04.497106', 'step': 1090, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:04.553664', 'step': 1090, 'epoch': 1} {'type': 'loss', 'content': 0.12614303827285767, 'timestamp': '2025-10-01 04:17:04.555497', 'step': 1091, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:17:04.610410', 'step': 1091, 'epoch': 1} {'type': 'loss', 'content': 0.16057881712913513, 'timestamp': '2025-10-01 04:17:04.616077', 'step': 1092, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:04.669342', 'step': 1092, 'epoch': 1} {'type': 'loss', 'content': 0.17512035369873047, 'timestamp': '2025-10-01 04:17:04.671459', 'step': 1093, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:04.724411', 'step': 1093, 'epoch': 1} {'type': 'loss', 'content': 0.14339430630207062, 'timestamp': '2025-10-01 04:17:04.726465', 'step': 1094, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:04.779459', 'step': 1094, 'epoch': 1} {'type': 'loss', 'content': 0.1965073049068451, 'timestamp': '2025-10-01 04:17:04.781535', 'step': 1095, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:04.836166', 'step': 1095, 'epoch': 1} {'type': 'loss', 'content': 0.16808375716209412, 'timestamp': '2025-10-01 04:17:04.841858', 'step': 1096, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:17:04.894002', 'step': 1096, 'epoch': 1} {'type': 'loss', 'content': 0.23152770102024078, 'timestamp': '2025-10-01 04:17:04.895773', 'step': 1097, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:17:04.948824', 'step': 1097, 'epoch': 1} {'type': 'loss', 'content': 0.21206916868686676, 'timestamp': '2025-10-01 04:17:04.950835', 'step': 1098, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:05.003321', 'step': 1098, 'epoch': 1} {'type': 'loss', 'content': 0.17740681767463684, 'timestamp': '2025-10-01 04:17:05.005338', 'step': 1099, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:05.057893', 'step': 1099, 'epoch': 1} {'type': 'loss', 'content': 0.14996039867401123, 'timestamp': '2025-10-01 04:17:05.063282', 'step': 1100, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:17:05.116001', 'step': 1100, 'epoch': 1} {'type': 'loss', 'content': 0.12472261488437653, 'timestamp': '2025-10-01 04:17:05.117798', 'step': 1101, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:05.169693', 'step': 1101, 'epoch': 1} {'type': 'loss', 'content': 0.1462525874376297, 'timestamp': '2025-10-01 04:17:05.179785', 'step': 1102, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:17:05.233256', 'step': 1102, 'epoch': 1} {'type': 'loss', 'content': 0.19456978142261505, 'timestamp': '2025-10-01 04:17:05.236108', 'step': 1103, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:05.289606', 'step': 1103, 'epoch': 1} {'type': 'loss', 'content': 0.13580796122550964, 'timestamp': '2025-10-01 04:17:05.295287', 'step': 1104, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:05.346879', 'step': 1104, 'epoch': 1} {'type': 'loss', 'content': 0.2577155828475952, 'timestamp': '2025-10-01 04:17:05.348967', 'step': 1105, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:05.401146', 'step': 1105, 'epoch': 1} {'type': 'loss', 'content': 0.16985547542572021, 'timestamp': '2025-10-01 04:17:05.403208', 'step': 1106, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:05.458994', 'step': 1106, 'epoch': 1} {'type': 'loss', 'content': 0.21013778448104858, 'timestamp': '2025-10-01 04:17:05.461086', 'step': 1107, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:17:05.518725', 'step': 1107, 'epoch': 1} {'type': 'loss', 'content': 0.23567107319831848, 'timestamp': '2025-10-01 04:17:05.524266', 'step': 1108, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:05.576984', 'step': 1108, 'epoch': 1} {'type': 'loss', 'content': 0.1626671552658081, 'timestamp': '2025-10-01 04:17:05.578861', 'step': 1109, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:05.631501', 'step': 1109, 'epoch': 1} {'type': 'loss', 'content': 0.16680434346199036, 'timestamp': '2025-10-01 04:17:05.633521', 'step': 1110, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:17:05.688965', 'step': 1110, 'epoch': 1} {'type': 'loss', 'content': 0.15834148228168488, 'timestamp': '2025-10-01 04:17:05.691036', 'step': 1111, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:17:05.743188', 'step': 1111, 'epoch': 1} {'type': 'loss', 'content': 0.23490144312381744, 'timestamp': '2025-10-01 04:17:05.748808', 'step': 1112, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:05.800658', 'step': 1112, 'epoch': 1} {'type': 'loss', 'content': 0.1988910436630249, 'timestamp': '2025-10-01 04:17:05.802538', 'step': 1113, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:05.855098', 'step': 1113, 'epoch': 1} {'type': 'loss', 'content': 0.157816544175148, 'timestamp': '2025-10-01 04:17:05.857245', 'step': 1114, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:17:05.910398', 'step': 1114, 'epoch': 1} {'type': 'loss', 'content': 0.1326979100704193, 'timestamp': '2025-10-01 04:17:05.912330', 'step': 1115, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:05.964995', 'step': 1115, 'epoch': 1} {'type': 'loss', 'content': 0.15648770332336426, 'timestamp': '2025-10-01 04:17:05.970437', 'step': 1116, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:06.022496', 'step': 1116, 'epoch': 1} {'type': 'loss', 'content': 0.19812358915805817, 'timestamp': '2025-10-01 04:17:06.024506', 'step': 1117, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:06.078158', 'step': 1117, 'epoch': 1} {'type': 'loss', 'content': 0.22945213317871094, 'timestamp': '2025-10-01 04:17:06.081600', 'step': 1118, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:06.134192', 'step': 1118, 'epoch': 1} {'type': 'loss', 'content': 0.1957722306251526, 'timestamp': '2025-10-01 04:17:06.135983', 'step': 1119, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:06.188001', 'step': 1119, 'epoch': 1} {'type': 'loss', 'content': 0.17570596933364868, 'timestamp': '2025-10-01 04:17:06.193404', 'step': 1120, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:06.245005', 'step': 1120, 'epoch': 1} {'type': 'loss', 'content': 0.24203163385391235, 'timestamp': '2025-10-01 04:17:06.247467', 'step': 1121, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:06.308710', 'step': 1121, 'epoch': 1} {'type': 'loss', 'content': 0.21496786177158356, 'timestamp': '2025-10-01 04:17:06.310515', 'step': 1122, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:06.363706', 'step': 1122, 'epoch': 1} {'type': 'loss', 'content': 0.16473904252052307, 'timestamp': '2025-10-01 04:17:06.365299', 'step': 1123, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:06.417997', 'step': 1123, 'epoch': 1} {'type': 'loss', 'content': 0.2010391354560852, 'timestamp': '2025-10-01 04:17:06.423425', 'step': 1124, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:06.474972', 'step': 1124, 'epoch': 1} {'type': 'loss', 'content': 0.16202957928180695, 'timestamp': '2025-10-01 04:17:06.476997', 'step': 1125, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:06.529716', 'step': 1125, 'epoch': 1} {'type': 'loss', 'content': 0.1374589502811432, 'timestamp': '2025-10-01 04:17:06.532274', 'step': 1126, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:17:06.584666', 'step': 1126, 'epoch': 1} {'type': 'loss', 'content': 0.21999196708202362, 'timestamp': '2025-10-01 04:17:06.587233', 'step': 1127, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:06.641482', 'step': 1127, 'epoch': 1} {'type': 'loss', 'content': 0.22336676716804504, 'timestamp': '2025-10-01 04:17:06.647078', 'step': 1128, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:06.700571', 'step': 1128, 'epoch': 1} {'type': 'loss', 'content': 0.12602075934410095, 'timestamp': '2025-10-01 04:17:06.702330', 'step': 1129, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:06.755594', 'step': 1129, 'epoch': 1} {'type': 'loss', 'content': 0.19800511002540588, 'timestamp': '2025-10-01 04:17:06.758087', 'step': 1130, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:17:06.810784', 'step': 1130, 'epoch': 1} {'type': 'loss', 'content': 0.2627311050891876, 'timestamp': '2025-10-01 04:17:06.812817', 'step': 1131, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:06.865556', 'step': 1131, 'epoch': 1} {'type': 'loss', 'content': 0.14422789216041565, 'timestamp': '2025-10-01 04:17:06.871181', 'step': 1132, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:06.925880', 'step': 1132, 'epoch': 1} {'type': 'loss', 'content': 0.25997495651245117, 'timestamp': '2025-10-01 04:17:06.927852', 'step': 1133, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:06.980592', 'step': 1133, 'epoch': 1} {'type': 'loss', 'content': 0.15618160367012024, 'timestamp': '2025-10-01 04:17:06.983021', 'step': 1134, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:17:07.038111', 'step': 1134, 'epoch': 1} {'type': 'loss', 'content': 0.35626083612442017, 'timestamp': '2025-10-01 04:17:07.042608', 'step': 1135, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:07.095313', 'step': 1135, 'epoch': 1} {'type': 'loss', 'content': 0.2723628282546997, 'timestamp': '2025-10-01 04:17:07.100584', 'step': 1136, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:07.152819', 'step': 1136, 'epoch': 1} {'type': 'loss', 'content': 0.18046006560325623, 'timestamp': '2025-10-01 04:17:07.154813', 'step': 1137, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:07.207807', 'step': 1137, 'epoch': 1} {'type': 'loss', 'content': 0.17809966206550598, 'timestamp': '2025-10-01 04:17:07.209786', 'step': 1138, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:17:07.262204', 'step': 1138, 'epoch': 1} {'type': 'loss', 'content': 0.18197591602802277, 'timestamp': '2025-10-01 04:17:07.264321', 'step': 1139, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:07.317246', 'step': 1139, 'epoch': 1} {'type': 'loss', 'content': 0.10208159685134888, 'timestamp': '2025-10-01 04:17:07.322666', 'step': 1140, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:07.374864', 'step': 1140, 'epoch': 1} {'type': 'loss', 'content': 0.1805703490972519, 'timestamp': '2025-10-01 04:17:07.376901', 'step': 1141, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:07.429636', 'step': 1141, 'epoch': 1} {'type': 'loss', 'content': 0.24326850473880768, 'timestamp': '2025-10-01 04:17:07.431681', 'step': 1142, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:17:07.484393', 'step': 1142, 'epoch': 1} {'type': 'loss', 'content': 0.11022596061229706, 'timestamp': '2025-10-01 04:17:07.486584', 'step': 1143, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:07.539430', 'step': 1143, 'epoch': 1} {'type': 'loss', 'content': 0.22131575644016266, 'timestamp': '2025-10-01 04:17:07.545350', 'step': 1144, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:17:07.597781', 'step': 1144, 'epoch': 1} {'type': 'loss', 'content': 0.17694099247455597, 'timestamp': '2025-10-01 04:17:07.599707', 'step': 1145, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:07.651520', 'step': 1145, 'epoch': 1} {'type': 'loss', 'content': 0.12615074217319489, 'timestamp': '2025-10-01 04:17:07.653898', 'step': 1146, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:07.705842', 'step': 1146, 'epoch': 1} {'type': 'loss', 'content': 0.20087361335754395, 'timestamp': '2025-10-01 04:17:07.707547', 'step': 1147, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:07.760017', 'step': 1147, 'epoch': 1} {'type': 'loss', 'content': 0.2822842001914978, 'timestamp': '2025-10-01 04:17:07.766137', 'step': 1148, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:07.818746', 'step': 1148, 'epoch': 1} {'type': 'loss', 'content': 0.2704969644546509, 'timestamp': '2025-10-01 04:17:07.821121', 'step': 1149, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:07.873504', 'step': 1149, 'epoch': 1} {'type': 'loss', 'content': 0.25330695509910583, 'timestamp': '2025-10-01 04:17:07.875937', 'step': 1150, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:07.929525', 'step': 1150, 'epoch': 1} {'type': 'loss', 'content': 0.11257369071245193, 'timestamp': '2025-10-01 04:17:07.931938', 'step': 1151, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:07.984304', 'step': 1151, 'epoch': 1} {'type': 'loss', 'content': 0.19472798705101013, 'timestamp': '2025-10-01 04:17:07.989819', 'step': 1152, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:08.045854', 'step': 1152, 'epoch': 1} {'type': 'loss', 'content': 0.1558196246623993, 'timestamp': '2025-10-01 04:17:08.047896', 'step': 1153, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:08.099858', 'step': 1153, 'epoch': 1} {'type': 'loss', 'content': 0.14194022119045258, 'timestamp': '2025-10-01 04:17:08.101836', 'step': 1154, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:08.154111', 'step': 1154, 'epoch': 1} {'type': 'loss', 'content': 0.17320075631141663, 'timestamp': '2025-10-01 04:17:08.158895', 'step': 1155, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-01 04:17:08.211431', 'step': 1155, 'epoch': 1} {'type': 'loss', 'content': 0.21736657619476318, 'timestamp': '2025-10-01 04:17:08.217983', 'step': 1156, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:17:08.270562', 'step': 1156, 'epoch': 1} {'type': 'loss', 'content': 0.22040531039237976, 'timestamp': '2025-10-01 04:17:08.272598', 'step': 1157, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:08.326162', 'step': 1157, 'epoch': 1} {'type': 'loss', 'content': 0.21662352979183197, 'timestamp': '2025-10-01 04:17:08.328992', 'step': 1158, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:08.386748', 'step': 1158, 'epoch': 1} {'type': 'loss', 'content': 0.17383980751037598, 'timestamp': '2025-10-01 04:17:08.395385', 'step': 1159, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:08.455734', 'step': 1159, 'epoch': 1} {'type': 'loss', 'content': 0.16327549517154694, 'timestamp': '2025-10-01 04:17:08.461846', 'step': 1160, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:08.515220', 'step': 1160, 'epoch': 1} {'type': 'loss', 'content': 0.12805010378360748, 'timestamp': '2025-10-01 04:17:08.517784', 'step': 1161, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:17:08.570896', 'step': 1161, 'epoch': 1} {'type': 'loss', 'content': 0.25521132349967957, 'timestamp': '2025-10-01 04:17:08.573799', 'step': 1162, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:08.628376', 'step': 1162, 'epoch': 1} {'type': 'loss', 'content': 0.20175981521606445, 'timestamp': '2025-10-01 04:17:08.635663', 'step': 1163, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:17:08.689490', 'step': 1163, 'epoch': 1} {'type': 'loss', 'content': 0.12071860581636429, 'timestamp': '2025-10-01 04:17:08.695422', 'step': 1164, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:08.748811', 'step': 1164, 'epoch': 1} {'type': 'loss', 'content': 0.15015920996665955, 'timestamp': '2025-10-01 04:17:08.751338', 'step': 1165, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:08.804802', 'step': 1165, 'epoch': 1} {'type': 'loss', 'content': 0.15099374949932098, 'timestamp': '2025-10-01 04:17:08.807005', 'step': 1166, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:08.860729', 'step': 1166, 'epoch': 1} {'type': 'loss', 'content': 0.17549888789653778, 'timestamp': '2025-10-01 04:17:08.862825', 'step': 1167, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:08.919722', 'step': 1167, 'epoch': 1} {'type': 'loss', 'content': 0.1476045399904251, 'timestamp': '2025-10-01 04:17:08.925738', 'step': 1168, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:08.987544', 'step': 1168, 'epoch': 1} {'type': 'loss', 'content': 0.19088208675384521, 'timestamp': '2025-10-01 04:17:08.989953', 'step': 1169, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:09.048988', 'step': 1169, 'epoch': 1} {'type': 'loss', 'content': 0.1915903091430664, 'timestamp': '2025-10-01 04:17:09.051216', 'step': 1170, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:09.105819', 'step': 1170, 'epoch': 1} {'type': 'loss', 'content': 0.1586872935295105, 'timestamp': '2025-10-01 04:17:09.107780', 'step': 1171, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:09.162190', 'step': 1171, 'epoch': 1} {'type': 'loss', 'content': 0.1846253126859665, 'timestamp': '2025-10-01 04:17:09.168136', 'step': 1172, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:09.222105', 'step': 1172, 'epoch': 1} {'type': 'loss', 'content': 0.16660712659358978, 'timestamp': '2025-10-01 04:17:09.225212', 'step': 1173, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:17:09.278126', 'step': 1173, 'epoch': 1} {'type': 'loss', 'content': 0.18210040032863617, 'timestamp': '2025-10-01 04:17:09.280329', 'step': 1174, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:09.334840', 'step': 1174, 'epoch': 1} {'type': 'loss', 'content': 0.15724694728851318, 'timestamp': '2025-10-01 04:17:09.337196', 'step': 1175, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:09.394919', 'step': 1175, 'epoch': 1} {'type': 'loss', 'content': 0.22792865335941315, 'timestamp': '2025-10-01 04:17:09.400621', 'step': 1176, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:09.454129', 'step': 1176, 'epoch': 1} {'type': 'loss', 'content': 0.1750900000333786, 'timestamp': '2025-10-01 04:17:09.456506', 'step': 1177, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:09.509807', 'step': 1177, 'epoch': 1} {'type': 'loss', 'content': 0.2647790014743805, 'timestamp': '2025-10-01 04:17:09.512044', 'step': 1178, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:09.565399', 'step': 1178, 'epoch': 1} {'type': 'loss', 'content': 0.1355723887681961, 'timestamp': '2025-10-01 04:17:09.567793', 'step': 1179, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:09.622534', 'step': 1179, 'epoch': 1} {'type': 'loss', 'content': 0.20970776677131653, 'timestamp': '2025-10-01 04:17:09.630766', 'step': 1180, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:09.684335', 'step': 1180, 'epoch': 1} {'type': 'loss', 'content': 0.20154240727424622, 'timestamp': '2025-10-01 04:17:09.686984', 'step': 1181, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:09.744219', 'step': 1181, 'epoch': 1} {'type': 'loss', 'content': 0.1776326447725296, 'timestamp': '2025-10-01 04:17:09.748499', 'step': 1182, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:09.811809', 'step': 1182, 'epoch': 1} {'type': 'loss', 'content': 0.1596323400735855, 'timestamp': '2025-10-01 04:17:09.822583', 'step': 1183, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:17:09.878294', 'step': 1183, 'epoch': 1} {'type': 'loss', 'content': 0.12273665517568588, 'timestamp': '2025-10-01 04:17:09.884392', 'step': 1184, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:17:09.942284', 'step': 1184, 'epoch': 1} {'type': 'loss', 'content': 0.13822481036186218, 'timestamp': '2025-10-01 04:17:09.944256', 'step': 1185, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:09.996625', 'step': 1185, 'epoch': 1} {'type': 'loss', 'content': 0.18321408331394196, 'timestamp': '2025-10-01 04:17:09.998664', 'step': 1186, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:10.053169', 'step': 1186, 'epoch': 1} {'type': 'loss', 'content': 0.25424137711524963, 'timestamp': '2025-10-01 04:17:10.055462', 'step': 1187, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:10.108328', 'step': 1187, 'epoch': 1} {'type': 'loss', 'content': 0.1978016346693039, 'timestamp': '2025-10-01 04:17:10.114841', 'step': 1188, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:10.168792', 'step': 1188, 'epoch': 1} {'type': 'loss', 'content': 0.14070026576519012, 'timestamp': '2025-10-01 04:17:10.181592', 'step': 1189, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:10.235025', 'step': 1189, 'epoch': 1} {'type': 'loss', 'content': 0.160756915807724, 'timestamp': '2025-10-01 04:17:10.237224', 'step': 1190, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:10.291624', 'step': 1190, 'epoch': 1} {'type': 'loss', 'content': 0.20191694796085358, 'timestamp': '2025-10-01 04:17:10.294054', 'step': 1191, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:10.348526', 'step': 1191, 'epoch': 1} {'type': 'loss', 'content': 0.09735213220119476, 'timestamp': '2025-10-01 04:17:10.354486', 'step': 1192, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:10.407275', 'step': 1192, 'epoch': 1} {'type': 'loss', 'content': 0.16290117800235748, 'timestamp': '2025-10-01 04:17:10.409250', 'step': 1193, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:10.462285', 'step': 1193, 'epoch': 1} {'type': 'loss', 'content': 0.23546932637691498, 'timestamp': '2025-10-01 04:17:10.464362', 'step': 1194, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:10.525869', 'step': 1194, 'epoch': 1} {'type': 'loss', 'content': 0.19602462649345398, 'timestamp': '2025-10-01 04:17:10.528144', 'step': 1195, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:10.582237', 'step': 1195, 'epoch': 1} {'type': 'loss', 'content': 0.18944710493087769, 'timestamp': '2025-10-01 04:17:10.588528', 'step': 1196, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:10.649613', 'step': 1196, 'epoch': 1} {'type': 'loss', 'content': 0.25105875730514526, 'timestamp': '2025-10-01 04:17:10.651686', 'step': 1197, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:10.705579', 'step': 1197, 'epoch': 1} {'type': 'loss', 'content': 0.1424192637205124, 'timestamp': '2025-10-01 04:17:10.707609', 'step': 1198, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:10.769328', 'step': 1198, 'epoch': 1} {'type': 'loss', 'content': 0.16882149875164032, 'timestamp': '2025-10-01 04:17:10.771594', 'step': 1199, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:10.825288', 'step': 1199, 'epoch': 1} {'type': 'loss', 'content': 0.1237870305776596, 'timestamp': '2025-10-01 04:17:10.831267', 'step': 1200, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:10.883914', 'step': 1200, 'epoch': 1} {'type': 'loss', 'content': 0.15752027928829193, 'timestamp': '2025-10-01 04:17:10.886933', 'step': 1201, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:10.946911', 'step': 1201, 'epoch': 1} {'type': 'loss', 'content': 0.17846478521823883, 'timestamp': '2025-10-01 04:17:10.949486', 'step': 1202, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:11.002408', 'step': 1202, 'epoch': 1} {'type': 'loss', 'content': 0.19616562128067017, 'timestamp': '2025-10-01 04:17:11.004538', 'step': 1203, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:11.057815', 'step': 1203, 'epoch': 1} {'type': 'loss', 'content': 0.19756878912448883, 'timestamp': '2025-10-01 04:17:11.063850', 'step': 1204, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:11.117710', 'step': 1204, 'epoch': 1} {'type': 'loss', 'content': 0.15725238621234894, 'timestamp': '2025-10-01 04:17:11.120046', 'step': 1205, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:17:11.182655', 'step': 1205, 'epoch': 1} {'type': 'loss', 'content': 0.23001188039779663, 'timestamp': '2025-10-01 04:17:11.184781', 'step': 1206, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:11.238831', 'step': 1206, 'epoch': 1} {'type': 'loss', 'content': 0.17839770019054413, 'timestamp': '2025-10-01 04:17:11.240939', 'step': 1207, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:11.305196', 'step': 1207, 'epoch': 1} {'type': 'loss', 'content': 0.1830008178949356, 'timestamp': '2025-10-01 04:17:11.311406', 'step': 1208, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:11.365837', 'step': 1208, 'epoch': 1} {'type': 'loss', 'content': 0.10442887991666794, 'timestamp': '2025-10-01 04:17:11.367787', 'step': 1209, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:17:11.429105', 'step': 1209, 'epoch': 1} {'type': 'loss', 'content': 0.22235539555549622, 'timestamp': '2025-10-01 04:17:11.431426', 'step': 1210, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:11.485869', 'step': 1210, 'epoch': 1} {'type': 'loss', 'content': 0.1549159288406372, 'timestamp': '2025-10-01 04:17:11.488343', 'step': 1211, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-01 04:17:11.544267', 'step': 1211, 'epoch': 1} {'type': 'loss', 'content': 0.20159967243671417, 'timestamp': '2025-10-01 04:17:11.558418', 'step': 1212, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:11.611486', 'step': 1212, 'epoch': 1} {'type': 'loss', 'content': 0.15298910439014435, 'timestamp': '2025-10-01 04:17:11.613276', 'step': 1213, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:11.671824', 'step': 1213, 'epoch': 1} {'type': 'loss', 'content': 0.13700614869594574, 'timestamp': '2025-10-01 04:17:11.674799', 'step': 1214, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:11.733391', 'step': 1214, 'epoch': 1} {'type': 'loss', 'content': 0.15543845295906067, 'timestamp': '2025-10-01 04:17:11.735762', 'step': 1215, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:11.795307', 'step': 1215, 'epoch': 1} {'type': 'loss', 'content': 0.13651202619075775, 'timestamp': '2025-10-01 04:17:11.801745', 'step': 1216, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:11.862301', 'step': 1216, 'epoch': 1} {'type': 'loss', 'content': 0.17665548622608185, 'timestamp': '2025-10-01 04:17:11.865861', 'step': 1217, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:11.929330', 'step': 1217, 'epoch': 1} {'type': 'loss', 'content': 0.1416267603635788, 'timestamp': '2025-10-01 04:17:11.934257', 'step': 1218, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:11.993785', 'step': 1218, 'epoch': 1} {'type': 'loss', 'content': 0.2035306692123413, 'timestamp': '2025-10-01 04:17:11.996905', 'step': 1219, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:17:12.056235', 'step': 1219, 'epoch': 1} {'type': 'loss', 'content': 0.15231581032276154, 'timestamp': '2025-10-01 04:17:12.063212', 'step': 1220, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:12.121757', 'step': 1220, 'epoch': 1} {'type': 'loss', 'content': 0.19236037135124207, 'timestamp': '2025-10-01 04:17:12.123614', 'step': 1221, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:12.185002', 'step': 1221, 'epoch': 1} {'type': 'loss', 'content': 0.17895947396755219, 'timestamp': '2025-10-01 04:17:12.187352', 'step': 1222, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:12.247525', 'step': 1222, 'epoch': 1} {'type': 'loss', 'content': 0.18232952058315277, 'timestamp': '2025-10-01 04:17:12.250360', 'step': 1223, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:12.311399', 'step': 1223, 'epoch': 1} {'type': 'loss', 'content': 0.15308862924575806, 'timestamp': '2025-10-01 04:17:12.331900', 'step': 1224, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:12.412748', 'step': 1224, 'epoch': 1} {'type': 'loss', 'content': 0.26890671253204346, 'timestamp': '2025-10-01 04:17:12.417042', 'step': 1225, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:12.476424', 'step': 1225, 'epoch': 1} {'type': 'loss', 'content': 0.30814775824546814, 'timestamp': '2025-10-01 04:17:12.480411', 'step': 1226, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:12.541242', 'step': 1226, 'epoch': 1} {'type': 'loss', 'content': 0.24058660864830017, 'timestamp': '2025-10-01 04:17:12.544693', 'step': 1227, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:12.604787', 'step': 1227, 'epoch': 1} {'type': 'loss', 'content': 0.1333935409784317, 'timestamp': '2025-10-01 04:17:12.611580', 'step': 1228, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:12.670766', 'step': 1228, 'epoch': 1} {'type': 'loss', 'content': 0.2001902163028717, 'timestamp': '2025-10-01 04:17:12.673063', 'step': 1229, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:12.731347', 'step': 1229, 'epoch': 1} {'type': 'loss', 'content': 0.3082253634929657, 'timestamp': '2025-10-01 04:17:12.733715', 'step': 1230, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:17:12.793235', 'step': 1230, 'epoch': 1} {'type': 'loss', 'content': 0.23794454336166382, 'timestamp': '2025-10-01 04:17:12.799708', 'step': 1231, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:12.864777', 'step': 1231, 'epoch': 1} {'type': 'loss', 'content': 0.19178622961044312, 'timestamp': '2025-10-01 04:17:12.872431', 'step': 1232, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:12.937174', 'step': 1232, 'epoch': 1} {'type': 'loss', 'content': 0.1755755990743637, 'timestamp': '2025-10-01 04:17:12.939237', 'step': 1233, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:12.997308', 'step': 1233, 'epoch': 1} {'type': 'loss', 'content': 0.16714514791965485, 'timestamp': '2025-10-01 04:17:12.999938', 'step': 1234, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:13.060875', 'step': 1234, 'epoch': 1} {'type': 'loss', 'content': 0.133916437625885, 'timestamp': '2025-10-01 04:17:13.063154', 'step': 1235, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:13.125042', 'step': 1235, 'epoch': 1} {'type': 'loss', 'content': 0.1383403092622757, 'timestamp': '2025-10-01 04:17:13.131614', 'step': 1236, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:13.189314', 'step': 1236, 'epoch': 1} {'type': 'loss', 'content': 0.2221647948026657, 'timestamp': '2025-10-01 04:17:13.191385', 'step': 1237, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-01 04:17:13.272366', 'step': 1237, 'epoch': 1} {'type': 'loss', 'content': 0.41108575463294983, 'timestamp': '2025-10-01 04:17:13.274984', 'step': 1238, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:13.335626', 'step': 1238, 'epoch': 1} {'type': 'loss', 'content': 0.14845514297485352, 'timestamp': '2025-10-01 04:17:13.344502', 'step': 1239, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:13.406462', 'step': 1239, 'epoch': 1} {'type': 'loss', 'content': 0.22710026800632477, 'timestamp': '2025-10-01 04:17:13.420523', 'step': 1240, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:13.479700', 'step': 1240, 'epoch': 1} {'type': 'loss', 'content': 0.3030742406845093, 'timestamp': '2025-10-01 04:17:13.481997', 'step': 1241, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:13.541989', 'step': 1241, 'epoch': 1} {'type': 'loss', 'content': 0.1791863888502121, 'timestamp': '2025-10-01 04:17:13.547847', 'step': 1242, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:13.604495', 'step': 1242, 'epoch': 1} {'type': 'loss', 'content': 0.17766910791397095, 'timestamp': '2025-10-01 04:17:13.606675', 'step': 1243, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:13.663275', 'step': 1243, 'epoch': 1} {'type': 'loss', 'content': 0.1442151516675949, 'timestamp': '2025-10-01 04:17:13.669558', 'step': 1244, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:13.722917', 'step': 1244, 'epoch': 1} {'type': 'loss', 'content': 0.21806396543979645, 'timestamp': '2025-10-01 04:17:13.726769', 'step': 1245, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:17:13.780122', 'step': 1245, 'epoch': 1} {'type': 'loss', 'content': 0.16848653554916382, 'timestamp': '2025-10-01 04:17:13.782274', 'step': 1246, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:13.835716', 'step': 1246, 'epoch': 1} {'type': 'loss', 'content': 0.27208495140075684, 'timestamp': '2025-10-01 04:17:13.843957', 'step': 1247, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:13.918595', 'step': 1247, 'epoch': 1} {'type': 'loss', 'content': 0.09935970604419708, 'timestamp': '2025-10-01 04:17:13.928984', 'step': 1248, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:14.035136', 'step': 1248, 'epoch': 1} {'type': 'loss', 'content': 0.18595464527606964, 'timestamp': '2025-10-01 04:17:14.050288', 'step': 1249, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:14.126284', 'step': 1249, 'epoch': 1} {'type': 'loss', 'content': 0.12737272679805756, 'timestamp': '2025-10-01 04:17:14.136336', 'step': 1250, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:14.216910', 'step': 1250, 'epoch': 1} {'type': 'loss', 'content': 0.15794312953948975, 'timestamp': '2025-10-01 04:17:14.231600', 'step': 1251, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:14.332175', 'step': 1251, 'epoch': 1} {'type': 'loss', 'content': 0.1341259479522705, 'timestamp': '2025-10-01 04:17:14.350722', 'step': 1252, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:14.456183', 'step': 1252, 'epoch': 1} {'type': 'loss', 'content': 0.15441535413265228, 'timestamp': '2025-10-01 04:17:14.470675', 'step': 1253, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:14.539997', 'step': 1253, 'epoch': 1} {'type': 'loss', 'content': 0.16462202370166779, 'timestamp': '2025-10-01 04:17:14.551820', 'step': 1254, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:14.656923', 'step': 1254, 'epoch': 1} {'type': 'loss', 'content': 0.16087855398654938, 'timestamp': '2025-10-01 04:17:14.683065', 'step': 1255, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:14.767397', 'step': 1255, 'epoch': 1} {'type': 'loss', 'content': 0.1865950971841812, 'timestamp': '2025-10-01 04:17:14.782822', 'step': 1256, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:14.860324', 'step': 1256, 'epoch': 1} {'type': 'loss', 'content': 0.1717434674501419, 'timestamp': '2025-10-01 04:17:14.900127', 'step': 1257, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:15.007066', 'step': 1257, 'epoch': 1} {'type': 'loss', 'content': 0.21895192563533783, 'timestamp': '2025-10-01 04:17:15.026879', 'step': 1258, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:15.127074', 'step': 1258, 'epoch': 1} {'type': 'loss', 'content': 0.20354120433330536, 'timestamp': '2025-10-01 04:17:15.142599', 'step': 1259, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:15.214583', 'step': 1259, 'epoch': 1} {'type': 'loss', 'content': 0.25343382358551025, 'timestamp': '2025-10-01 04:17:15.224571', 'step': 1260, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:17:15.276956', 'step': 1260, 'epoch': 1} {'type': 'loss', 'content': 0.20923253893852234, 'timestamp': '2025-10-01 04:17:15.279000', 'step': 1261, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:15.332481', 'step': 1261, 'epoch': 1} {'type': 'loss', 'content': 0.2360057830810547, 'timestamp': '2025-10-01 04:17:15.334891', 'step': 1262, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:15.388113', 'step': 1262, 'epoch': 1} {'type': 'loss', 'content': 0.18186236917972565, 'timestamp': '2025-10-01 04:17:15.390722', 'step': 1263, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:15.444061', 'step': 1263, 'epoch': 1} {'type': 'loss', 'content': 0.15089595317840576, 'timestamp': '2025-10-01 04:17:15.450299', 'step': 1264, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:15.506716', 'step': 1264, 'epoch': 1} {'type': 'loss', 'content': 0.28173476457595825, 'timestamp': '2025-10-01 04:17:15.508678', 'step': 1265, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:15.566413', 'step': 1265, 'epoch': 1} {'type': 'loss', 'content': 0.19566331803798676, 'timestamp': '2025-10-01 04:17:15.569753', 'step': 1266, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:15.622738', 'step': 1266, 'epoch': 1} {'type': 'loss', 'content': 0.2523045241832733, 'timestamp': '2025-10-01 04:17:15.624596', 'step': 1267, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:15.679376', 'step': 1267, 'epoch': 1} {'type': 'loss', 'content': 0.05857904255390167, 'timestamp': '2025-10-01 04:17:15.686576', 'step': 1268, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:15.738976', 'step': 1268, 'epoch': 1} {'type': 'loss', 'content': 0.16460829973220825, 'timestamp': '2025-10-01 04:17:15.740857', 'step': 1269, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:15.793112', 'step': 1269, 'epoch': 1} {'type': 'loss', 'content': 0.3639049828052521, 'timestamp': '2025-10-01 04:17:15.794987', 'step': 1270, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:15.855557', 'step': 1270, 'epoch': 1} {'type': 'loss', 'content': 0.23185963928699493, 'timestamp': '2025-10-01 04:17:15.861385', 'step': 1271, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:17:15.920849', 'step': 1271, 'epoch': 1} {'type': 'loss', 'content': 0.19434590637683868, 'timestamp': '2025-10-01 04:17:15.926497', 'step': 1272, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:15.979081', 'step': 1272, 'epoch': 1} {'type': 'loss', 'content': 0.19893823564052582, 'timestamp': '2025-10-01 04:17:15.982272', 'step': 1273, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:16.037583', 'step': 1273, 'epoch': 1} {'type': 'loss', 'content': 0.12481449544429779, 'timestamp': '2025-10-01 04:17:16.048155', 'step': 1274, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:16.112932', 'step': 1274, 'epoch': 1} {'type': 'loss', 'content': 0.1195826530456543, 'timestamp': '2025-10-01 04:17:16.115283', 'step': 1275, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:16.168261', 'step': 1275, 'epoch': 1} {'type': 'loss', 'content': 0.207090824842453, 'timestamp': '2025-10-01 04:17:16.174370', 'step': 1276, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:16.227203', 'step': 1276, 'epoch': 1} {'type': 'loss', 'content': 0.27624908089637756, 'timestamp': '2025-10-01 04:17:16.229092', 'step': 1277, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:16.282521', 'step': 1277, 'epoch': 1} {'type': 'loss', 'content': 0.16792061924934387, 'timestamp': '2025-10-01 04:17:16.284575', 'step': 1278, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:16.338558', 'step': 1278, 'epoch': 1} {'type': 'loss', 'content': 0.1530066579580307, 'timestamp': '2025-10-01 04:17:16.340724', 'step': 1279, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:16.394923', 'step': 1279, 'epoch': 1} {'type': 'loss', 'content': 0.19907720386981964, 'timestamp': '2025-10-01 04:17:16.400944', 'step': 1280, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:16.456032', 'step': 1280, 'epoch': 1} {'type': 'loss', 'content': 0.1611708551645279, 'timestamp': '2025-10-01 04:17:16.457837', 'step': 1281, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:16.518370', 'step': 1281, 'epoch': 1} {'type': 'loss', 'content': 0.14054808020591736, 'timestamp': '2025-10-01 04:17:16.520524', 'step': 1282, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:16.574403', 'step': 1282, 'epoch': 1} {'type': 'loss', 'content': 0.20677495002746582, 'timestamp': '2025-10-01 04:17:16.577759', 'step': 1283, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:16.629830', 'step': 1283, 'epoch': 1} {'type': 'loss', 'content': 0.1577327996492386, 'timestamp': '2025-10-01 04:17:16.636255', 'step': 1284, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:17:16.689284', 'step': 1284, 'epoch': 1} {'type': 'loss', 'content': 0.19258321821689606, 'timestamp': '2025-10-01 04:17:16.691369', 'step': 1285, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:16.750608', 'step': 1285, 'epoch': 1} {'type': 'loss', 'content': 0.23208941519260406, 'timestamp': '2025-10-01 04:17:16.752807', 'step': 1286, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:16.805571', 'step': 1286, 'epoch': 1} {'type': 'loss', 'content': 0.26098543405532837, 'timestamp': '2025-10-01 04:17:16.807529', 'step': 1287, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:16.860145', 'step': 1287, 'epoch': 1} {'type': 'loss', 'content': 0.1436012089252472, 'timestamp': '2025-10-01 04:17:16.866321', 'step': 1288, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:16.927318', 'step': 1288, 'epoch': 1} {'type': 'loss', 'content': 0.15304376184940338, 'timestamp': '2025-10-01 04:17:16.931956', 'step': 1289, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:16.988815', 'step': 1289, 'epoch': 1} {'type': 'loss', 'content': 0.19191673398017883, 'timestamp': '2025-10-01 04:17:16.991706', 'step': 1290, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:17.046469', 'step': 1290, 'epoch': 1} {'type': 'loss', 'content': 0.20807625353336334, 'timestamp': '2025-10-01 04:17:17.048983', 'step': 1291, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:17.101991', 'step': 1291, 'epoch': 1} {'type': 'loss', 'content': 0.18855439126491547, 'timestamp': '2025-10-01 04:17:17.108029', 'step': 1292, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:17.163628', 'step': 1292, 'epoch': 1} {'type': 'loss', 'content': 0.15491895377635956, 'timestamp': '2025-10-01 04:17:17.165674', 'step': 1293, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:17.218657', 'step': 1293, 'epoch': 1} {'type': 'loss', 'content': 0.15044569969177246, 'timestamp': '2025-10-01 04:17:17.225061', 'step': 1294, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:17:17.280257', 'step': 1294, 'epoch': 1} {'type': 'loss', 'content': 0.22101055085659027, 'timestamp': '2025-10-01 04:17:17.282163', 'step': 1295, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:17.335341', 'step': 1295, 'epoch': 1} {'type': 'loss', 'content': 0.2164909839630127, 'timestamp': '2025-10-01 04:17:17.341048', 'step': 1296, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:17.394902', 'step': 1296, 'epoch': 1} {'type': 'loss', 'content': 0.1500464826822281, 'timestamp': '2025-10-01 04:17:17.397776', 'step': 1297, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:17.451424', 'step': 1297, 'epoch': 1} {'type': 'loss', 'content': 0.18048126995563507, 'timestamp': '2025-10-01 04:17:17.453546', 'step': 1298, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:17.506974', 'step': 1298, 'epoch': 1} {'type': 'loss', 'content': 0.1819663792848587, 'timestamp': '2025-10-01 04:17:17.509143', 'step': 1299, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:17.564687', 'step': 1299, 'epoch': 1} {'type': 'loss', 'content': 0.16663308441638947, 'timestamp': '2025-10-01 04:17:17.581256', 'step': 1300, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:17.644588', 'step': 1300, 'epoch': 1} {'type': 'loss', 'content': 0.26490044593811035, 'timestamp': '2025-10-01 04:17:17.647132', 'step': 1301, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:17.701318', 'step': 1301, 'epoch': 1} {'type': 'loss', 'content': 0.20044933259487152, 'timestamp': '2025-10-01 04:17:17.703477', 'step': 1302, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:17.757109', 'step': 1302, 'epoch': 1} {'type': 'loss', 'content': 0.2610425055027008, 'timestamp': '2025-10-01 04:17:17.767125', 'step': 1303, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:17.821233', 'step': 1303, 'epoch': 1} {'type': 'loss', 'content': 0.19198116660118103, 'timestamp': '2025-10-01 04:17:17.827456', 'step': 1304, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:17.884661', 'step': 1304, 'epoch': 1} {'type': 'loss', 'content': 0.1743948608636856, 'timestamp': '2025-10-01 04:17:17.897145', 'step': 1305, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:17.951410', 'step': 1305, 'epoch': 1} {'type': 'loss', 'content': 0.3320205509662628, 'timestamp': '2025-10-01 04:17:17.953903', 'step': 1306, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:18.012565', 'step': 1306, 'epoch': 1} {'type': 'loss', 'content': 0.1827024221420288, 'timestamp': '2025-10-01 04:17:18.015455', 'step': 1307, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:18.068626', 'step': 1307, 'epoch': 1} {'type': 'loss', 'content': 0.2580889165401459, 'timestamp': '2025-10-01 04:17:18.074348', 'step': 1308, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:18.128980', 'step': 1308, 'epoch': 1} {'type': 'loss', 'content': 0.15594534575939178, 'timestamp': '2025-10-01 04:17:18.131303', 'step': 1309, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:18.184213', 'step': 1309, 'epoch': 1} {'type': 'loss', 'content': 0.1009395495057106, 'timestamp': '2025-10-01 04:17:18.186426', 'step': 1310, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:18.239759', 'step': 1310, 'epoch': 1} {'type': 'loss', 'content': 0.1579870730638504, 'timestamp': '2025-10-01 04:17:18.241881', 'step': 1311, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:17:18.295024', 'step': 1311, 'epoch': 1} {'type': 'loss', 'content': 0.15467293560504913, 'timestamp': '2025-10-01 04:17:18.301130', 'step': 1312, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:18.354495', 'step': 1312, 'epoch': 1} {'type': 'loss', 'content': 0.20554810762405396, 'timestamp': '2025-10-01 04:17:18.356785', 'step': 1313, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:17:18.413579', 'step': 1313, 'epoch': 1} {'type': 'loss', 'content': 0.17044004797935486, 'timestamp': '2025-10-01 04:17:18.415614', 'step': 1314, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:17:18.469252', 'step': 1314, 'epoch': 1} {'type': 'loss', 'content': 0.21393169462680817, 'timestamp': '2025-10-01 04:17:18.471268', 'step': 1315, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:18.524615', 'step': 1315, 'epoch': 1} {'type': 'loss', 'content': 0.22886712849140167, 'timestamp': '2025-10-01 04:17:18.530557', 'step': 1316, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:18.594521', 'step': 1316, 'epoch': 1} {'type': 'loss', 'content': 0.16248969733715057, 'timestamp': '2025-10-01 04:17:18.597465', 'step': 1317, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:18.653096', 'step': 1317, 'epoch': 1} {'type': 'loss', 'content': 0.14593690633773804, 'timestamp': '2025-10-01 04:17:18.656672', 'step': 1318, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:17:18.712774', 'step': 1318, 'epoch': 1} {'type': 'loss', 'content': 0.18616139888763428, 'timestamp': '2025-10-01 04:17:18.715139', 'step': 1319, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:18.768957', 'step': 1319, 'epoch': 1} {'type': 'loss', 'content': 0.1042671650648117, 'timestamp': '2025-10-01 04:17:18.776847', 'step': 1320, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:18.830832', 'step': 1320, 'epoch': 1} {'type': 'loss', 'content': 0.19692982733249664, 'timestamp': '2025-10-01 04:17:18.833227', 'step': 1321, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:17:18.888116', 'step': 1321, 'epoch': 1} {'type': 'loss', 'content': 0.12170109897851944, 'timestamp': '2025-10-01 04:17:18.890030', 'step': 1322, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:18.943661', 'step': 1322, 'epoch': 1} {'type': 'loss', 'content': 0.116350457072258, 'timestamp': '2025-10-01 04:17:18.945615', 'step': 1323, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:17:19.000558', 'step': 1323, 'epoch': 1} {'type': 'loss', 'content': 0.1419239640235901, 'timestamp': '2025-10-01 04:17:19.006236', 'step': 1324, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:19.058381', 'step': 1324, 'epoch': 1} {'type': 'loss', 'content': 0.2941035032272339, 'timestamp': '2025-10-01 04:17:19.060366', 'step': 1325, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:19.115241', 'step': 1325, 'epoch': 1} {'type': 'loss', 'content': 0.09860971570014954, 'timestamp': '2025-10-01 04:17:19.120409', 'step': 1326, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:19.181730', 'step': 1326, 'epoch': 1} {'type': 'loss', 'content': 0.17912191152572632, 'timestamp': '2025-10-01 04:17:19.183458', 'step': 1327, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:19.237255', 'step': 1327, 'epoch': 1} {'type': 'loss', 'content': 0.1300925463438034, 'timestamp': '2025-10-01 04:17:19.252257', 'step': 1328, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:19.304958', 'step': 1328, 'epoch': 1} {'type': 'loss', 'content': 0.1464315801858902, 'timestamp': '2025-10-01 04:17:19.307559', 'step': 1329, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:19.360166', 'step': 1329, 'epoch': 1} {'type': 'loss', 'content': 0.12121683359146118, 'timestamp': '2025-10-01 04:17:19.362009', 'step': 1330, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:19.414872', 'step': 1330, 'epoch': 1} {'type': 'loss', 'content': 0.1548977643251419, 'timestamp': '2025-10-01 04:17:19.416940', 'step': 1331, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:19.469274', 'step': 1331, 'epoch': 1} {'type': 'loss', 'content': 0.25336411595344543, 'timestamp': '2025-10-01 04:17:19.475953', 'step': 1332, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:19.529474', 'step': 1332, 'epoch': 1} {'type': 'loss', 'content': 0.18634481728076935, 'timestamp': '2025-10-01 04:17:19.531352', 'step': 1333, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:17:19.592394', 'step': 1333, 'epoch': 1} {'type': 'loss', 'content': 0.2602296769618988, 'timestamp': '2025-10-01 04:17:19.595784', 'step': 1334, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:19.649350', 'step': 1334, 'epoch': 1} {'type': 'loss', 'content': 0.1919805258512497, 'timestamp': '2025-10-01 04:17:19.651919', 'step': 1335, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:19.718898', 'step': 1335, 'epoch': 1} {'type': 'loss', 'content': 0.17642875015735626, 'timestamp': '2025-10-01 04:17:19.724394', 'step': 1336, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:17:19.780677', 'step': 1336, 'epoch': 1} {'type': 'loss', 'content': 0.10240328311920166, 'timestamp': '2025-10-01 04:17:19.783219', 'step': 1337, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:19.836530', 'step': 1337, 'epoch': 1} {'type': 'loss', 'content': 0.11672165989875793, 'timestamp': '2025-10-01 04:17:19.838455', 'step': 1338, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:19.891258', 'step': 1338, 'epoch': 1} {'type': 'loss', 'content': 0.15686921775341034, 'timestamp': '2025-10-01 04:17:19.893324', 'step': 1339, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:19.946147', 'step': 1339, 'epoch': 1} {'type': 'loss', 'content': 0.23376937210559845, 'timestamp': '2025-10-01 04:17:19.951948', 'step': 1340, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:20.005416', 'step': 1340, 'epoch': 1} {'type': 'loss', 'content': 0.19372399151325226, 'timestamp': '2025-10-01 04:17:20.007682', 'step': 1341, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:20.060994', 'step': 1341, 'epoch': 1} {'type': 'loss', 'content': 0.21670563519001007, 'timestamp': '2025-10-01 04:17:20.063111', 'step': 1342, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:20.118769', 'step': 1342, 'epoch': 1} {'type': 'loss', 'content': 0.138215109705925, 'timestamp': '2025-10-01 04:17:20.130061', 'step': 1343, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:20.197342', 'step': 1343, 'epoch': 1} {'type': 'loss', 'content': 0.12257983535528183, 'timestamp': '2025-10-01 04:17:20.202913', 'step': 1344, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:20.255433', 'step': 1344, 'epoch': 1} {'type': 'loss', 'content': 0.2823459506034851, 'timestamp': '2025-10-01 04:17:20.257671', 'step': 1345, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:20.310416', 'step': 1345, 'epoch': 1} {'type': 'loss', 'content': 0.19773143529891968, 'timestamp': '2025-10-01 04:17:20.312354', 'step': 1346, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:20.374939', 'step': 1346, 'epoch': 1} {'type': 'loss', 'content': 0.12277215719223022, 'timestamp': '2025-10-01 04:17:20.376712', 'step': 1347, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:20.429765', 'step': 1347, 'epoch': 1} {'type': 'loss', 'content': 0.2229674756526947, 'timestamp': '2025-10-01 04:17:20.435377', 'step': 1348, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:20.487939', 'step': 1348, 'epoch': 1} {'type': 'loss', 'content': 0.2166852504014969, 'timestamp': '2025-10-01 04:17:20.490131', 'step': 1349, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:17:20.543338', 'step': 1349, 'epoch': 1} {'type': 'loss', 'content': 0.1797093152999878, 'timestamp': '2025-10-01 04:17:20.545856', 'step': 1350, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:20.598307', 'step': 1350, 'epoch': 1} {'type': 'loss', 'content': 0.126521497964859, 'timestamp': '2025-10-01 04:17:20.600469', 'step': 1351, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:20.652718', 'step': 1351, 'epoch': 1} {'type': 'loss', 'content': 0.15275146067142487, 'timestamp': '2025-10-01 04:17:20.658166', 'step': 1352, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:20.720607', 'step': 1352, 'epoch': 1} {'type': 'loss', 'content': 0.22704435884952545, 'timestamp': '2025-10-01 04:17:20.722457', 'step': 1353, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:20.775985', 'step': 1353, 'epoch': 1} {'type': 'loss', 'content': 0.14941127598285675, 'timestamp': '2025-10-01 04:17:20.778265', 'step': 1354, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:20.841200', 'step': 1354, 'epoch': 1} {'type': 'loss', 'content': 0.28964850306510925, 'timestamp': '2025-10-01 04:17:20.843132', 'step': 1355, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:20.895363', 'step': 1355, 'epoch': 1} {'type': 'loss', 'content': 0.140278622508049, 'timestamp': '2025-10-01 04:17:20.900831', 'step': 1356, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:17:20.953010', 'step': 1356, 'epoch': 1} {'type': 'loss', 'content': 0.21585911512374878, 'timestamp': '2025-10-01 04:17:20.959465', 'step': 1357, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:21.013204', 'step': 1357, 'epoch': 1} {'type': 'loss', 'content': 0.2923157811164856, 'timestamp': '2025-10-01 04:17:21.014827', 'step': 1358, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:17:21.071470', 'step': 1358, 'epoch': 1} {'type': 'loss', 'content': 0.2352583110332489, 'timestamp': '2025-10-01 04:17:21.074084', 'step': 1359, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:21.131370', 'step': 1359, 'epoch': 1} {'type': 'loss', 'content': 0.20144571363925934, 'timestamp': '2025-10-01 04:17:21.136684', 'step': 1360, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:21.188965', 'step': 1360, 'epoch': 1} {'type': 'loss', 'content': 0.21759121119976044, 'timestamp': '2025-10-01 04:17:21.191261', 'step': 1361, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:21.246126', 'step': 1361, 'epoch': 1} {'type': 'loss', 'content': 0.1103275790810585, 'timestamp': '2025-10-01 04:17:21.248088', 'step': 1362, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:17:21.301034', 'step': 1362, 'epoch': 1} {'type': 'loss', 'content': 0.12866950035095215, 'timestamp': '2025-10-01 04:17:21.303398', 'step': 1363, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:21.365491', 'step': 1363, 'epoch': 1} {'type': 'loss', 'content': 0.21431583166122437, 'timestamp': '2025-10-01 04:17:21.371413', 'step': 1364, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:21.426218', 'step': 1364, 'epoch': 1} {'type': 'loss', 'content': 0.09010137617588043, 'timestamp': '2025-10-01 04:17:21.427832', 'step': 1365, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:17:21.480306', 'step': 1365, 'epoch': 1} {'type': 'loss', 'content': 0.1472504436969757, 'timestamp': '2025-10-01 04:17:21.483212', 'step': 1366, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:21.545995', 'step': 1366, 'epoch': 1} {'type': 'loss', 'content': 0.20457936823368073, 'timestamp': '2025-10-01 04:17:21.559250', 'step': 1367, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:21.612147', 'step': 1367, 'epoch': 1} {'type': 'loss', 'content': 0.09374883770942688, 'timestamp': '2025-10-01 04:17:21.617732', 'step': 1368, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:21.704152', 'step': 1368, 'epoch': 1} {'type': 'loss', 'content': 0.20415303111076355, 'timestamp': '2025-10-01 04:17:21.706257', 'step': 1369, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:21.758925', 'step': 1369, 'epoch': 1} {'type': 'loss', 'content': 0.18559706211090088, 'timestamp': '2025-10-01 04:17:21.761032', 'step': 1370, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:21.822668', 'step': 1370, 'epoch': 1} {'type': 'loss', 'content': 0.21573705971240997, 'timestamp': '2025-10-01 04:17:21.824609', 'step': 1371, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:21.899309', 'step': 1371, 'epoch': 1} {'type': 'loss', 'content': 0.12430191785097122, 'timestamp': '2025-10-01 04:17:21.904569', 'step': 1372, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:21.958336', 'step': 1372, 'epoch': 1} {'type': 'loss', 'content': 0.07560836523771286, 'timestamp': '2025-10-01 04:17:21.962379', 'step': 1373, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:22.019414', 'step': 1373, 'epoch': 1} {'type': 'loss', 'content': 0.22710531949996948, 'timestamp': '2025-10-01 04:17:22.021384', 'step': 1374, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:17:22.073971', 'step': 1374, 'epoch': 1} {'type': 'loss', 'content': 0.14858068525791168, 'timestamp': '2025-10-01 04:17:22.076007', 'step': 1375, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:22.132541', 'step': 1375, 'epoch': 1} {'type': 'loss', 'content': 0.2573917508125305, 'timestamp': '2025-10-01 04:17:22.138167', 'step': 1376, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:17:22.190791', 'step': 1376, 'epoch': 1} {'type': 'loss', 'content': 0.1659996211528778, 'timestamp': '2025-10-01 04:17:22.192785', 'step': 1377, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:22.245483', 'step': 1377, 'epoch': 1} {'type': 'loss', 'content': 0.22026081383228302, 'timestamp': '2025-10-01 04:17:22.247240', 'step': 1378, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:22.300965', 'step': 1378, 'epoch': 1} {'type': 'loss', 'content': 0.14852020144462585, 'timestamp': '2025-10-01 04:17:22.302918', 'step': 1379, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:22.357387', 'step': 1379, 'epoch': 1} {'type': 'loss', 'content': 0.15756726264953613, 'timestamp': '2025-10-01 04:17:22.362764', 'step': 1380, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:17:22.416713', 'step': 1380, 'epoch': 1} {'type': 'loss', 'content': 0.17480657994747162, 'timestamp': '2025-10-01 04:17:22.428020', 'step': 1381, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:22.490540', 'step': 1381, 'epoch': 1} {'type': 'loss', 'content': 0.14803235232830048, 'timestamp': '2025-10-01 04:17:22.492499', 'step': 1382, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:22.545966', 'step': 1382, 'epoch': 1} {'type': 'loss', 'content': 0.22953125834465027, 'timestamp': '2025-10-01 04:17:22.547779', 'step': 1383, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:22.600261', 'step': 1383, 'epoch': 1} {'type': 'loss', 'content': 0.16763782501220703, 'timestamp': '2025-10-01 04:17:22.606615', 'step': 1384, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:22.678630', 'step': 1384, 'epoch': 1} {'type': 'loss', 'content': 0.21685031056404114, 'timestamp': '2025-10-01 04:17:22.680435', 'step': 1385, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:17:22.732877', 'step': 1385, 'epoch': 1} {'type': 'loss', 'content': 0.18328802287578583, 'timestamp': '2025-10-01 04:17:22.734553', 'step': 1386, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:22.799713', 'step': 1386, 'epoch': 1} {'type': 'loss', 'content': 0.15837223827838898, 'timestamp': '2025-10-01 04:17:22.801736', 'step': 1387, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:22.854570', 'step': 1387, 'epoch': 1} {'type': 'loss', 'content': 0.1648263931274414, 'timestamp': '2025-10-01 04:17:22.860340', 'step': 1388, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:22.921507', 'step': 1388, 'epoch': 1} {'type': 'loss', 'content': 0.20203635096549988, 'timestamp': '2025-10-01 04:17:22.923479', 'step': 1389, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:22.986045', 'step': 1389, 'epoch': 1} {'type': 'loss', 'content': 0.13888882100582123, 'timestamp': '2025-10-01 04:17:22.988062', 'step': 1390, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:17:23.055068', 'step': 1390, 'epoch': 1} {'type': 'loss', 'content': 0.1904633492231369, 'timestamp': '2025-10-01 04:17:23.061600', 'step': 1391, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:17:23.116023', 'step': 1391, 'epoch': 1} {'type': 'loss', 'content': 0.07884777337312698, 'timestamp': '2025-10-01 04:17:23.123211', 'step': 1392, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:23.176816', 'step': 1392, 'epoch': 1} {'type': 'loss', 'content': 0.23384547233581543, 'timestamp': '2025-10-01 04:17:23.180157', 'step': 1393, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:23.232950', 'step': 1393, 'epoch': 1} {'type': 'loss', 'content': 0.17238371074199677, 'timestamp': '2025-10-01 04:17:23.235425', 'step': 1394, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:23.290812', 'step': 1394, 'epoch': 1} {'type': 'loss', 'content': 0.12743452191352844, 'timestamp': '2025-10-01 04:17:23.304382', 'step': 1395, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:23.357695', 'step': 1395, 'epoch': 1} {'type': 'loss', 'content': 0.25392135977745056, 'timestamp': '2025-10-01 04:17:23.363316', 'step': 1396, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:23.415978', 'step': 1396, 'epoch': 1} {'type': 'loss', 'content': 0.20270779728889465, 'timestamp': '2025-10-01 04:17:23.423765', 'step': 1397, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:23.477526', 'step': 1397, 'epoch': 1} {'type': 'loss', 'content': 0.22185051441192627, 'timestamp': '2025-10-01 04:17:23.479280', 'step': 1398, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-01 04:17:36.677945', 'step': 1398, 'epoch': 1} {'type': 'pplx', 'content': 9904.757970953271, 'timestamp': '2025-10-01 04:17:36.680577', 'step': 1398, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:36.734272', 'step': 1398, 'epoch': 1} {'type': 'loss', 'content': 0.2464832365512848, 'timestamp': '2025-10-01 04:17:36.736175', 'step': 1399, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:36.789522', 'step': 1399, 'epoch': 1} {'type': 'loss', 'content': 0.15000294148921967, 'timestamp': '2025-10-01 04:17:36.795378', 'step': 1400, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:17:36.847913', 'step': 1400, 'epoch': 1} {'type': 'loss', 'content': 0.21226318180561066, 'timestamp': '2025-10-01 04:17:36.850061', 'step': 1401, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:36.902984', 'step': 1401, 'epoch': 1} {'type': 'loss', 'content': 0.20150285959243774, 'timestamp': '2025-10-01 04:17:36.905114', 'step': 1402, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:17:36.963465', 'step': 1402, 'epoch': 1} {'type': 'loss', 'content': 0.17461273074150085, 'timestamp': '2025-10-01 04:17:36.965314', 'step': 1403, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:37.018412', 'step': 1403, 'epoch': 1} {'type': 'loss', 'content': 0.18090654909610748, 'timestamp': '2025-10-01 04:17:37.024166', 'step': 1404, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:37.077922', 'step': 1404, 'epoch': 1} {'type': 'loss', 'content': 0.1680075079202652, 'timestamp': '2025-10-01 04:17:37.079903', 'step': 1405, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:17:37.133385', 'step': 1405, 'epoch': 1} {'type': 'loss', 'content': 0.16007167100906372, 'timestamp': '2025-10-01 04:17:37.135449', 'step': 1406, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:17:37.188159', 'step': 1406, 'epoch': 1} {'type': 'loss', 'content': 0.1442704051733017, 'timestamp': '2025-10-01 04:17:37.190129', 'step': 1407, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:37.242287', 'step': 1407, 'epoch': 1} {'type': 'loss', 'content': 0.2678883373737335, 'timestamp': '2025-10-01 04:17:37.248718', 'step': 1408, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:37.302181', 'step': 1408, 'epoch': 1} {'type': 'loss', 'content': 0.15277564525604248, 'timestamp': '2025-10-01 04:17:37.304235', 'step': 1409, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:37.363932', 'step': 1409, 'epoch': 1} {'type': 'loss', 'content': 0.17935457825660706, 'timestamp': '2025-10-01 04:17:37.365810', 'step': 1410, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:37.419496', 'step': 1410, 'epoch': 1} {'type': 'loss', 'content': 0.17615722119808197, 'timestamp': '2025-10-01 04:17:37.421367', 'step': 1411, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:37.474469', 'step': 1411, 'epoch': 1} {'type': 'loss', 'content': 0.12125353515148163, 'timestamp': '2025-10-01 04:17:37.479996', 'step': 1412, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:37.539870', 'step': 1412, 'epoch': 1} {'type': 'loss', 'content': 0.22346660494804382, 'timestamp': '2025-10-01 04:17:37.541832', 'step': 1413, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:17:37.593992', 'step': 1413, 'epoch': 1} {'type': 'loss', 'content': 0.197279155254364, 'timestamp': '2025-10-01 04:17:37.595931', 'step': 1414, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:37.648575', 'step': 1414, 'epoch': 1} {'type': 'loss', 'content': 0.15440109372138977, 'timestamp': '2025-10-01 04:17:37.650755', 'step': 1415, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:37.704254', 'step': 1415, 'epoch': 1} {'type': 'loss', 'content': 0.10829529911279678, 'timestamp': '2025-10-01 04:17:37.709940', 'step': 1416, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:37.766378', 'step': 1416, 'epoch': 1} {'type': 'loss', 'content': 0.27831047773361206, 'timestamp': '2025-10-01 04:17:37.768558', 'step': 1417, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:17:37.821600', 'step': 1417, 'epoch': 1} {'type': 'loss', 'content': 0.1916661113500595, 'timestamp': '2025-10-01 04:17:37.823764', 'step': 1418, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:37.876927', 'step': 1418, 'epoch': 1} {'type': 'loss', 'content': 0.22957226634025574, 'timestamp': '2025-10-01 04:17:37.879005', 'step': 1419, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:17:37.934035', 'step': 1419, 'epoch': 1} {'type': 'loss', 'content': 0.269522100687027, 'timestamp': '2025-10-01 04:17:37.939414', 'step': 1420, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:37.994498', 'step': 1420, 'epoch': 1} {'type': 'loss', 'content': 0.15451177954673767, 'timestamp': '2025-10-01 04:17:37.996464', 'step': 1421, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:17:38.058422', 'step': 1421, 'epoch': 1} {'type': 'loss', 'content': 0.17675743997097015, 'timestamp': '2025-10-01 04:17:38.060460', 'step': 1422, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:38.113881', 'step': 1422, 'epoch': 1} {'type': 'loss', 'content': 0.13659991323947906, 'timestamp': '2025-10-01 04:17:38.116210', 'step': 1423, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:38.174686', 'step': 1423, 'epoch': 1} {'type': 'loss', 'content': 0.2290656715631485, 'timestamp': '2025-10-01 04:17:38.180456', 'step': 1424, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:38.243099', 'step': 1424, 'epoch': 1} {'type': 'loss', 'content': 0.15283706784248352, 'timestamp': '2025-10-01 04:17:38.244892', 'step': 1425, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:38.297095', 'step': 1425, 'epoch': 1} {'type': 'loss', 'content': 0.2730821669101715, 'timestamp': '2025-10-01 04:17:38.299358', 'step': 1426, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:17:38.354652', 'step': 1426, 'epoch': 1} {'type': 'loss', 'content': 0.19189158082008362, 'timestamp': '2025-10-01 04:17:38.357039', 'step': 1427, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:38.409734', 'step': 1427, 'epoch': 1} {'type': 'loss', 'content': 0.17773102223873138, 'timestamp': '2025-10-01 04:17:38.415103', 'step': 1428, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:38.467541', 'step': 1428, 'epoch': 1} {'type': 'loss', 'content': 0.16983391344547272, 'timestamp': '2025-10-01 04:17:38.469421', 'step': 1429, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:38.523507', 'step': 1429, 'epoch': 1} {'type': 'loss', 'content': 0.1907898187637329, 'timestamp': '2025-10-01 04:17:38.526679', 'step': 1430, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:38.580362', 'step': 1430, 'epoch': 1} {'type': 'loss', 'content': 0.2624709904193878, 'timestamp': '2025-10-01 04:17:38.582538', 'step': 1431, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:17:38.635860', 'step': 1431, 'epoch': 1} {'type': 'loss', 'content': 0.21269504725933075, 'timestamp': '2025-10-01 04:17:38.641465', 'step': 1432, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:38.694677', 'step': 1432, 'epoch': 1} {'type': 'loss', 'content': 0.14438508450984955, 'timestamp': '2025-10-01 04:17:38.700172', 'step': 1433, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:17:38.755137', 'step': 1433, 'epoch': 1} {'type': 'loss', 'content': 0.2896408438682556, 'timestamp': '2025-10-01 04:17:38.757183', 'step': 1434, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:38.811122', 'step': 1434, 'epoch': 1} {'type': 'loss', 'content': 0.12253609299659729, 'timestamp': '2025-10-01 04:17:38.813179', 'step': 1435, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:38.866173', 'step': 1435, 'epoch': 1} {'type': 'loss', 'content': 0.1565152108669281, 'timestamp': '2025-10-01 04:17:38.875211', 'step': 1436, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:38.927539', 'step': 1436, 'epoch': 1} {'type': 'loss', 'content': 0.22766175866127014, 'timestamp': '2025-10-01 04:17:38.929611', 'step': 1437, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:38.983032', 'step': 1437, 'epoch': 1} {'type': 'loss', 'content': 0.17660973966121674, 'timestamp': '2025-10-01 04:17:38.984954', 'step': 1438, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:17:39.038172', 'step': 1438, 'epoch': 1} {'type': 'loss', 'content': 0.20580261945724487, 'timestamp': '2025-10-01 04:17:39.040151', 'step': 1439, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:39.093636', 'step': 1439, 'epoch': 1} {'type': 'loss', 'content': 0.21238760650157928, 'timestamp': '2025-10-01 04:17:39.099690', 'step': 1440, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:39.153700', 'step': 1440, 'epoch': 1} {'type': 'loss', 'content': 0.146775484085083, 'timestamp': '2025-10-01 04:17:39.155716', 'step': 1441, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:39.209430', 'step': 1441, 'epoch': 1} {'type': 'loss', 'content': 0.24167269468307495, 'timestamp': '2025-10-01 04:17:39.211590', 'step': 1442, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:39.269420', 'step': 1442, 'epoch': 1} {'type': 'loss', 'content': 0.18095695972442627, 'timestamp': '2025-10-01 04:17:39.271976', 'step': 1443, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:39.324671', 'step': 1443, 'epoch': 1} {'type': 'loss', 'content': 0.2619170844554901, 'timestamp': '2025-10-01 04:17:39.333015', 'step': 1444, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:39.391116', 'step': 1444, 'epoch': 1} {'type': 'loss', 'content': 0.3302091360092163, 'timestamp': '2025-10-01 04:17:39.393498', 'step': 1445, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:39.448425', 'step': 1445, 'epoch': 1} {'type': 'loss', 'content': 0.1283191293478012, 'timestamp': '2025-10-01 04:17:39.450646', 'step': 1446, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:39.504469', 'step': 1446, 'epoch': 1} {'type': 'loss', 'content': 0.17505760490894318, 'timestamp': '2025-10-01 04:17:39.507013', 'step': 1447, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:39.561867', 'step': 1447, 'epoch': 1} {'type': 'loss', 'content': 0.2545992434024811, 'timestamp': '2025-10-01 04:17:39.567946', 'step': 1448, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:39.620196', 'step': 1448, 'epoch': 1} {'type': 'loss', 'content': 0.16702763736248016, 'timestamp': '2025-10-01 04:17:39.622155', 'step': 1449, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:39.674473', 'step': 1449, 'epoch': 1} {'type': 'loss', 'content': 0.20604315400123596, 'timestamp': '2025-10-01 04:17:39.676463', 'step': 1450, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:39.730095', 'step': 1450, 'epoch': 1} {'type': 'loss', 'content': 0.24317073822021484, 'timestamp': '2025-10-01 04:17:39.732628', 'step': 1451, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:39.787212', 'step': 1451, 'epoch': 1} {'type': 'loss', 'content': 0.1701624095439911, 'timestamp': '2025-10-01 04:17:39.792864', 'step': 1452, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:39.846108', 'step': 1452, 'epoch': 1} {'type': 'loss', 'content': 0.15720802545547485, 'timestamp': '2025-10-01 04:17:39.848867', 'step': 1453, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:39.907194', 'step': 1453, 'epoch': 1} {'type': 'loss', 'content': 0.09922988712787628, 'timestamp': '2025-10-01 04:17:39.916132', 'step': 1454, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:39.968847', 'step': 1454, 'epoch': 1} {'type': 'loss', 'content': 0.25362321734428406, 'timestamp': '2025-10-01 04:17:39.970665', 'step': 1455, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:40.036207', 'step': 1455, 'epoch': 1} {'type': 'loss', 'content': 0.12004309892654419, 'timestamp': '2025-10-01 04:17:40.041921', 'step': 1456, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:40.110376', 'step': 1456, 'epoch': 1} {'type': 'loss', 'content': 0.1654510349035263, 'timestamp': '2025-10-01 04:17:40.112466', 'step': 1457, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:40.166036', 'step': 1457, 'epoch': 1} {'type': 'loss', 'content': 0.141622856259346, 'timestamp': '2025-10-01 04:17:40.168184', 'step': 1458, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:40.221629', 'step': 1458, 'epoch': 1} {'type': 'loss', 'content': 0.18108341097831726, 'timestamp': '2025-10-01 04:17:40.223784', 'step': 1459, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:40.289207', 'step': 1459, 'epoch': 1} {'type': 'loss', 'content': 0.13383503258228302, 'timestamp': '2025-10-01 04:17:40.294998', 'step': 1460, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:40.356524', 'step': 1460, 'epoch': 1} {'type': 'loss', 'content': 0.2263341248035431, 'timestamp': '2025-10-01 04:17:40.360097', 'step': 1461, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:40.413497', 'step': 1461, 'epoch': 1} {'type': 'loss', 'content': 0.19676584005355835, 'timestamp': '2025-10-01 04:17:40.415655', 'step': 1462, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:40.475486', 'step': 1462, 'epoch': 1} {'type': 'loss', 'content': 0.2802685797214508, 'timestamp': '2025-10-01 04:17:40.487024', 'step': 1463, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:40.539793', 'step': 1463, 'epoch': 1} {'type': 'loss', 'content': 0.23231393098831177, 'timestamp': '2025-10-01 04:17:40.545386', 'step': 1464, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:40.598242', 'step': 1464, 'epoch': 1} {'type': 'loss', 'content': 0.21781480312347412, 'timestamp': '2025-10-01 04:17:40.600207', 'step': 1465, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:40.654157', 'step': 1465, 'epoch': 1} {'type': 'loss', 'content': 0.1696922481060028, 'timestamp': '2025-10-01 04:17:40.656162', 'step': 1466, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:40.710294', 'step': 1466, 'epoch': 1} {'type': 'loss', 'content': 0.13537436723709106, 'timestamp': '2025-10-01 04:17:40.712554', 'step': 1467, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:40.770198', 'step': 1467, 'epoch': 1} {'type': 'loss', 'content': 0.24821491539478302, 'timestamp': '2025-10-01 04:17:40.775679', 'step': 1468, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:40.828857', 'step': 1468, 'epoch': 1} {'type': 'loss', 'content': 0.29901430010795593, 'timestamp': '2025-10-01 04:17:40.830816', 'step': 1469, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:40.884000', 'step': 1469, 'epoch': 1} {'type': 'loss', 'content': 0.24436646699905396, 'timestamp': '2025-10-01 04:17:40.886087', 'step': 1470, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:40.938898', 'step': 1470, 'epoch': 1} {'type': 'loss', 'content': 0.1498079001903534, 'timestamp': '2025-10-01 04:17:40.940928', 'step': 1471, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:40.995459', 'step': 1471, 'epoch': 1} {'type': 'loss', 'content': 0.13882508873939514, 'timestamp': '2025-10-01 04:17:41.001365', 'step': 1472, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:41.053768', 'step': 1472, 'epoch': 1} {'type': 'loss', 'content': 0.14239391684532166, 'timestamp': '2025-10-01 04:17:41.056107', 'step': 1473, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:41.108887', 'step': 1473, 'epoch': 1} {'type': 'loss', 'content': 0.2064037322998047, 'timestamp': '2025-10-01 04:17:41.111436', 'step': 1474, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:41.164936', 'step': 1474, 'epoch': 1} {'type': 'loss', 'content': 0.14016900956630707, 'timestamp': '2025-10-01 04:17:41.167893', 'step': 1475, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:41.220893', 'step': 1475, 'epoch': 1} {'type': 'loss', 'content': 0.15894612669944763, 'timestamp': '2025-10-01 04:17:41.226577', 'step': 1476, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:41.279087', 'step': 1476, 'epoch': 1} {'type': 'loss', 'content': 0.24490559101104736, 'timestamp': '2025-10-01 04:17:41.281101', 'step': 1477, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:41.333586', 'step': 1477, 'epoch': 1} {'type': 'loss', 'content': 0.07689575850963593, 'timestamp': '2025-10-01 04:17:41.335676', 'step': 1478, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:41.389374', 'step': 1478, 'epoch': 1} {'type': 'loss', 'content': 0.2441939264535904, 'timestamp': '2025-10-01 04:17:41.392732', 'step': 1479, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:41.445955', 'step': 1479, 'epoch': 1} {'type': 'loss', 'content': 0.19629791378974915, 'timestamp': '2025-10-01 04:17:41.451633', 'step': 1480, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:41.504349', 'step': 1480, 'epoch': 1} {'type': 'loss', 'content': 0.11272943019866943, 'timestamp': '2025-10-01 04:17:41.506834', 'step': 1481, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:41.561329', 'step': 1481, 'epoch': 1} {'type': 'loss', 'content': 0.1881362795829773, 'timestamp': '2025-10-01 04:17:41.569944', 'step': 1482, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:41.622536', 'step': 1482, 'epoch': 1} {'type': 'loss', 'content': 0.18500837683677673, 'timestamp': '2025-10-01 04:17:41.624625', 'step': 1483, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:41.677984', 'step': 1483, 'epoch': 1} {'type': 'loss', 'content': 0.1430235207080841, 'timestamp': '2025-10-01 04:17:41.683525', 'step': 1484, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:41.735728', 'step': 1484, 'epoch': 1} {'type': 'loss', 'content': 0.24600794911384583, 'timestamp': '2025-10-01 04:17:41.738577', 'step': 1485, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:41.792545', 'step': 1485, 'epoch': 1} {'type': 'loss', 'content': 0.1917552947998047, 'timestamp': '2025-10-01 04:17:41.794550', 'step': 1486, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:41.847893', 'step': 1486, 'epoch': 1} {'type': 'loss', 'content': 0.13789832592010498, 'timestamp': '2025-10-01 04:17:41.850828', 'step': 1487, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:41.903641', 'step': 1487, 'epoch': 1} {'type': 'loss', 'content': 0.16883306205272675, 'timestamp': '2025-10-01 04:17:41.911666', 'step': 1488, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:17:41.966618', 'step': 1488, 'epoch': 1} {'type': 'loss', 'content': 0.19788090884685516, 'timestamp': '2025-10-01 04:17:41.969161', 'step': 1489, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:42.021704', 'step': 1489, 'epoch': 1} {'type': 'loss', 'content': 0.2170107215642929, 'timestamp': '2025-10-01 04:17:42.024598', 'step': 1490, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:42.078192', 'step': 1490, 'epoch': 1} {'type': 'loss', 'content': 0.1764814704656601, 'timestamp': '2025-10-01 04:17:42.080817', 'step': 1491, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:42.134501', 'step': 1491, 'epoch': 1} {'type': 'loss', 'content': 0.18718452751636505, 'timestamp': '2025-10-01 04:17:42.140459', 'step': 1492, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:42.193587', 'step': 1492, 'epoch': 1} {'type': 'loss', 'content': 0.25036993622779846, 'timestamp': '2025-10-01 04:17:42.195807', 'step': 1493, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:42.249201', 'step': 1493, 'epoch': 1} {'type': 'loss', 'content': 0.15787100791931152, 'timestamp': '2025-10-01 04:17:42.251358', 'step': 1494, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:42.305220', 'step': 1494, 'epoch': 1} {'type': 'loss', 'content': 0.20824795961380005, 'timestamp': '2025-10-01 04:17:42.307349', 'step': 1495, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:42.362797', 'step': 1495, 'epoch': 1} {'type': 'loss', 'content': 0.3292136490345001, 'timestamp': '2025-10-01 04:17:42.368577', 'step': 1496, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:42.422391', 'step': 1496, 'epoch': 1} {'type': 'loss', 'content': 0.1825903058052063, 'timestamp': '2025-10-01 04:17:42.432229', 'step': 1497, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:17:42.487018', 'step': 1497, 'epoch': 1} {'type': 'loss', 'content': 0.14431002736091614, 'timestamp': '2025-10-01 04:17:42.489124', 'step': 1498, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:17:42.544759', 'step': 1498, 'epoch': 1} {'type': 'loss', 'content': 0.11792588233947754, 'timestamp': '2025-10-01 04:17:42.547110', 'step': 1499, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:42.601837', 'step': 1499, 'epoch': 1} {'type': 'loss', 'content': 0.19943739473819733, 'timestamp': '2025-10-01 04:17:42.611866', 'step': 1500, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 1500', 'timestamp': '2025-10-01 04:17:42.986534', 'step': 1500, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:43.046407', 'step': 1500, 'epoch': 1} {'type': 'loss', 'content': 0.22021332383155823, 'timestamp': '2025-10-01 04:17:43.048677', 'step': 1501, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:43.103882', 'step': 1501, 'epoch': 1} {'type': 'loss', 'content': 0.3119799494743347, 'timestamp': '2025-10-01 04:17:43.107527', 'step': 1502, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:43.174104', 'step': 1502, 'epoch': 1} {'type': 'loss', 'content': 0.1849905550479889, 'timestamp': '2025-10-01 04:17:43.176195', 'step': 1503, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:43.232800', 'step': 1503, 'epoch': 1} {'type': 'loss', 'content': 0.14159736037254333, 'timestamp': '2025-10-01 04:17:43.239119', 'step': 1504, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:43.293009', 'step': 1504, 'epoch': 1} {'type': 'loss', 'content': 0.1893378347158432, 'timestamp': '2025-10-01 04:17:43.295351', 'step': 1505, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:43.349183', 'step': 1505, 'epoch': 1} {'type': 'loss', 'content': 0.18316033482551575, 'timestamp': '2025-10-01 04:17:43.351348', 'step': 1506, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:43.411356', 'step': 1506, 'epoch': 1} {'type': 'loss', 'content': 0.17326657474040985, 'timestamp': '2025-10-01 04:17:43.413725', 'step': 1507, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:43.469520', 'step': 1507, 'epoch': 1} {'type': 'loss', 'content': 0.17499756813049316, 'timestamp': '2025-10-01 04:17:43.475415', 'step': 1508, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:43.529162', 'step': 1508, 'epoch': 1} {'type': 'loss', 'content': 0.1752183884382248, 'timestamp': '2025-10-01 04:17:43.531218', 'step': 1509, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:43.586396', 'step': 1509, 'epoch': 1} {'type': 'loss', 'content': 0.2057638168334961, 'timestamp': '2025-10-01 04:17:43.588463', 'step': 1510, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:17:43.641946', 'step': 1510, 'epoch': 1} {'type': 'loss', 'content': 0.13507619500160217, 'timestamp': '2025-10-01 04:17:43.643962', 'step': 1511, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:43.696927', 'step': 1511, 'epoch': 1} {'type': 'loss', 'content': 0.16748975217342377, 'timestamp': '2025-10-01 04:17:43.702639', 'step': 1512, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:43.756114', 'step': 1512, 'epoch': 1} {'type': 'loss', 'content': 0.17881205677986145, 'timestamp': '2025-10-01 04:17:43.758020', 'step': 1513, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:43.811317', 'step': 1513, 'epoch': 1} {'type': 'loss', 'content': 0.3739742338657379, 'timestamp': '2025-10-01 04:17:43.813333', 'step': 1514, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:17:43.866795', 'step': 1514, 'epoch': 1} {'type': 'loss', 'content': 0.2457730770111084, 'timestamp': '2025-10-01 04:17:43.868831', 'step': 1515, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:43.921999', 'step': 1515, 'epoch': 1} {'type': 'loss', 'content': 0.19338184595108032, 'timestamp': '2025-10-01 04:17:43.927735', 'step': 1516, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:43.980189', 'step': 1516, 'epoch': 1} {'type': 'loss', 'content': 0.15068645775318146, 'timestamp': '2025-10-01 04:17:43.982591', 'step': 1517, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:44.036249', 'step': 1517, 'epoch': 1} {'type': 'loss', 'content': 0.20321869850158691, 'timestamp': '2025-10-01 04:17:44.038238', 'step': 1518, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:17:44.093890', 'step': 1518, 'epoch': 1} {'type': 'loss', 'content': 0.33852314949035645, 'timestamp': '2025-10-01 04:17:44.096018', 'step': 1519, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:44.154758', 'step': 1519, 'epoch': 1} {'type': 'loss', 'content': 0.11383765935897827, 'timestamp': '2025-10-01 04:17:44.161525', 'step': 1520, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:44.220555', 'step': 1520, 'epoch': 1} {'type': 'loss', 'content': 0.06970323622226715, 'timestamp': '2025-10-01 04:17:44.222798', 'step': 1521, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:44.280917', 'step': 1521, 'epoch': 1} {'type': 'loss', 'content': 0.200117290019989, 'timestamp': '2025-10-01 04:17:44.285631', 'step': 1522, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:17:44.344740', 'step': 1522, 'epoch': 1} {'type': 'loss', 'content': 0.1932796984910965, 'timestamp': '2025-10-01 04:17:44.346834', 'step': 1523, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:17:44.405881', 'step': 1523, 'epoch': 1} {'type': 'loss', 'content': 0.1898910105228424, 'timestamp': '2025-10-01 04:17:44.412278', 'step': 1524, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:44.472295', 'step': 1524, 'epoch': 1} {'type': 'loss', 'content': 0.21832402050495148, 'timestamp': '2025-10-01 04:17:44.477234', 'step': 1525, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:44.532962', 'step': 1525, 'epoch': 1} {'type': 'loss', 'content': 0.20617376267910004, 'timestamp': '2025-10-01 04:17:44.536718', 'step': 1526, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:44.593603', 'step': 1526, 'epoch': 1} {'type': 'loss', 'content': 0.251350075006485, 'timestamp': '2025-10-01 04:17:44.595926', 'step': 1527, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:17:44.653785', 'step': 1527, 'epoch': 1} {'type': 'loss', 'content': 0.12298758327960968, 'timestamp': '2025-10-01 04:17:44.660983', 'step': 1528, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:44.716859', 'step': 1528, 'epoch': 1} {'type': 'loss', 'content': 0.20137910544872284, 'timestamp': '2025-10-01 04:17:44.718960', 'step': 1529, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:44.773157', 'step': 1529, 'epoch': 1} {'type': 'loss', 'content': 0.15533536672592163, 'timestamp': '2025-10-01 04:17:44.775121', 'step': 1530, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:44.828100', 'step': 1530, 'epoch': 1} {'type': 'loss', 'content': 0.13586384057998657, 'timestamp': '2025-10-01 04:17:44.830231', 'step': 1531, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:44.889499', 'step': 1531, 'epoch': 1} {'type': 'loss', 'content': 0.22657057642936707, 'timestamp': '2025-10-01 04:17:44.895594', 'step': 1532, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:44.948795', 'step': 1532, 'epoch': 1} {'type': 'loss', 'content': 0.16016130149364471, 'timestamp': '2025-10-01 04:17:44.950823', 'step': 1533, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:45.004113', 'step': 1533, 'epoch': 1} {'type': 'loss', 'content': 0.17006264626979828, 'timestamp': '2025-10-01 04:17:45.005992', 'step': 1534, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:45.059532', 'step': 1534, 'epoch': 1} {'type': 'loss', 'content': 0.2592714726924896, 'timestamp': '2025-10-01 04:17:45.061375', 'step': 1535, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:17:45.114160', 'step': 1535, 'epoch': 1} {'type': 'loss', 'content': 0.10779426991939545, 'timestamp': '2025-10-01 04:17:45.120177', 'step': 1536, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:45.172502', 'step': 1536, 'epoch': 1} {'type': 'loss', 'content': 0.13461704552173615, 'timestamp': '2025-10-01 04:17:45.174300', 'step': 1537, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:45.226857', 'step': 1537, 'epoch': 1} {'type': 'loss', 'content': 0.28048861026763916, 'timestamp': '2025-10-01 04:17:45.228896', 'step': 1538, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:45.281789', 'step': 1538, 'epoch': 1} {'type': 'loss', 'content': 0.2181849181652069, 'timestamp': '2025-10-01 04:17:45.283724', 'step': 1539, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:45.343296', 'step': 1539, 'epoch': 1} {'type': 'loss', 'content': 0.19817717373371124, 'timestamp': '2025-10-01 04:17:45.348946', 'step': 1540, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:45.403800', 'step': 1540, 'epoch': 1} {'type': 'loss', 'content': 0.2555352449417114, 'timestamp': '2025-10-01 04:17:45.405768', 'step': 1541, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:45.458595', 'step': 1541, 'epoch': 1} {'type': 'loss', 'content': 0.2254437804222107, 'timestamp': '2025-10-01 04:17:45.460792', 'step': 1542, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:45.514138', 'step': 1542, 'epoch': 1} {'type': 'loss', 'content': 0.11058668047189713, 'timestamp': '2025-10-01 04:17:45.516315', 'step': 1543, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:45.569335', 'step': 1543, 'epoch': 1} {'type': 'loss', 'content': 0.12093518674373627, 'timestamp': '2025-10-01 04:17:45.574945', 'step': 1544, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:45.630093', 'step': 1544, 'epoch': 1} {'type': 'loss', 'content': 0.3217430114746094, 'timestamp': '2025-10-01 04:17:45.632177', 'step': 1545, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:45.685101', 'step': 1545, 'epoch': 1} {'type': 'loss', 'content': 0.14785242080688477, 'timestamp': '2025-10-01 04:17:45.687285', 'step': 1546, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:45.749999', 'step': 1546, 'epoch': 1} {'type': 'loss', 'content': 0.10762139409780502, 'timestamp': '2025-10-01 04:17:45.752342', 'step': 1547, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:45.806278', 'step': 1547, 'epoch': 1} {'type': 'loss', 'content': 0.2189338058233261, 'timestamp': '2025-10-01 04:17:45.811852', 'step': 1548, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:45.863677', 'step': 1548, 'epoch': 1} {'type': 'loss', 'content': 0.1878318339586258, 'timestamp': '2025-10-01 04:17:45.865945', 'step': 1549, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:45.918532', 'step': 1549, 'epoch': 1} {'type': 'loss', 'content': 0.12461438775062561, 'timestamp': '2025-10-01 04:17:45.930527', 'step': 1550, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:17:45.983293', 'step': 1550, 'epoch': 1} {'type': 'loss', 'content': 0.1457914412021637, 'timestamp': '2025-10-01 04:17:45.996164', 'step': 1551, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:46.049530', 'step': 1551, 'epoch': 1} {'type': 'loss', 'content': 0.11313942074775696, 'timestamp': '2025-10-01 04:17:46.055092', 'step': 1552, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:46.116178', 'step': 1552, 'epoch': 1} {'type': 'loss', 'content': 0.1692180335521698, 'timestamp': '2025-10-01 04:17:46.118693', 'step': 1553, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:46.181327', 'step': 1553, 'epoch': 1} {'type': 'loss', 'content': 0.1709551364183426, 'timestamp': '2025-10-01 04:17:46.184902', 'step': 1554, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:46.238393', 'step': 1554, 'epoch': 1} {'type': 'loss', 'content': 0.28914594650268555, 'timestamp': '2025-10-01 04:17:46.244324', 'step': 1555, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:46.298973', 'step': 1555, 'epoch': 1} {'type': 'loss', 'content': 0.17445829510688782, 'timestamp': '2025-10-01 04:17:46.305464', 'step': 1556, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:46.359960', 'step': 1556, 'epoch': 1} {'type': 'loss', 'content': 0.18717218935489655, 'timestamp': '2025-10-01 04:17:46.362087', 'step': 1557, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:46.414775', 'step': 1557, 'epoch': 1} {'type': 'loss', 'content': 0.14651542901992798, 'timestamp': '2025-10-01 04:17:46.416809', 'step': 1558, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:17:46.469814', 'step': 1558, 'epoch': 1} {'type': 'loss', 'content': 0.1854291707277298, 'timestamp': '2025-10-01 04:17:46.471944', 'step': 1559, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:46.524608', 'step': 1559, 'epoch': 1} {'type': 'loss', 'content': 0.21640338003635406, 'timestamp': '2025-10-01 04:17:46.535104', 'step': 1560, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:46.587827', 'step': 1560, 'epoch': 1} {'type': 'loss', 'content': 0.22152014076709747, 'timestamp': '2025-10-01 04:17:46.589966', 'step': 1561, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:46.645911', 'step': 1561, 'epoch': 1} {'type': 'loss', 'content': 0.16834387183189392, 'timestamp': '2025-10-01 04:17:46.648316', 'step': 1562, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:17:46.712242', 'step': 1562, 'epoch': 1} {'type': 'loss', 'content': 0.2164722979068756, 'timestamp': '2025-10-01 04:17:46.714472', 'step': 1563, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:46.767446', 'step': 1563, 'epoch': 1} {'type': 'loss', 'content': 0.17575956881046295, 'timestamp': '2025-10-01 04:17:46.785310', 'step': 1564, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:46.838543', 'step': 1564, 'epoch': 1} {'type': 'loss', 'content': 0.16589951515197754, 'timestamp': '2025-10-01 04:17:46.844958', 'step': 1565, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:46.897271', 'step': 1565, 'epoch': 1} {'type': 'loss', 'content': 0.25681906938552856, 'timestamp': '2025-10-01 04:17:46.899488', 'step': 1566, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:46.952105', 'step': 1566, 'epoch': 1} {'type': 'loss', 'content': 0.10602235794067383, 'timestamp': '2025-10-01 04:17:46.956609', 'step': 1567, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:47.012252', 'step': 1567, 'epoch': 1} {'type': 'loss', 'content': 0.2927165627479553, 'timestamp': '2025-10-01 04:17:47.019570', 'step': 1568, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:47.071664', 'step': 1568, 'epoch': 1} {'type': 'loss', 'content': 0.14740748703479767, 'timestamp': '2025-10-01 04:17:47.073591', 'step': 1569, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:47.126886', 'step': 1569, 'epoch': 1} {'type': 'loss', 'content': 0.18880750238895416, 'timestamp': '2025-10-01 04:17:47.129038', 'step': 1570, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:47.182203', 'step': 1570, 'epoch': 1} {'type': 'loss', 'content': 0.1369556486606598, 'timestamp': '2025-10-01 04:17:47.184104', 'step': 1571, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:47.239985', 'step': 1571, 'epoch': 1} {'type': 'loss', 'content': 0.3123866617679596, 'timestamp': '2025-10-01 04:17:47.245438', 'step': 1572, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:17:47.298780', 'step': 1572, 'epoch': 1} {'type': 'loss', 'content': 0.1517290472984314, 'timestamp': '2025-10-01 04:17:47.300916', 'step': 1573, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:17:47.354918', 'step': 1573, 'epoch': 1} {'type': 'loss', 'content': 0.0976409837603569, 'timestamp': '2025-10-01 04:17:47.358110', 'step': 1574, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:47.412892', 'step': 1574, 'epoch': 1} {'type': 'loss', 'content': 0.18613936007022858, 'timestamp': '2025-10-01 04:17:47.415163', 'step': 1575, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:47.468223', 'step': 1575, 'epoch': 1} {'type': 'loss', 'content': 0.210590198636055, 'timestamp': '2025-10-01 04:17:47.474440', 'step': 1576, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:47.526869', 'step': 1576, 'epoch': 1} {'type': 'loss', 'content': 0.19569529592990875, 'timestamp': '2025-10-01 04:17:47.529057', 'step': 1577, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:47.581788', 'step': 1577, 'epoch': 1} {'type': 'loss', 'content': 0.12750276923179626, 'timestamp': '2025-10-01 04:17:47.583667', 'step': 1578, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:47.636595', 'step': 1578, 'epoch': 1} {'type': 'loss', 'content': 0.1844884306192398, 'timestamp': '2025-10-01 04:17:47.638972', 'step': 1579, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:47.691372', 'step': 1579, 'epoch': 1} {'type': 'loss', 'content': 0.19384373724460602, 'timestamp': '2025-10-01 04:17:47.696838', 'step': 1580, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:17:47.751993', 'step': 1580, 'epoch': 1} {'type': 'loss', 'content': 0.16159121692180634, 'timestamp': '2025-10-01 04:17:47.754088', 'step': 1581, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:47.809526', 'step': 1581, 'epoch': 1} {'type': 'loss', 'content': 0.20950542390346527, 'timestamp': '2025-10-01 04:17:47.812003', 'step': 1582, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:47.864729', 'step': 1582, 'epoch': 1} {'type': 'loss', 'content': 0.2240280956029892, 'timestamp': '2025-10-01 04:17:47.866820', 'step': 1583, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:47.919673', 'step': 1583, 'epoch': 1} {'type': 'loss', 'content': 0.1835126131772995, 'timestamp': '2025-10-01 04:17:47.925847', 'step': 1584, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:47.997119', 'step': 1584, 'epoch': 1} {'type': 'loss', 'content': 0.1835947185754776, 'timestamp': '2025-10-01 04:17:47.999488', 'step': 1585, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:48.052580', 'step': 1585, 'epoch': 1} {'type': 'loss', 'content': 0.1825498789548874, 'timestamp': '2025-10-01 04:17:48.054627', 'step': 1586, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:48.107325', 'step': 1586, 'epoch': 1} {'type': 'loss', 'content': 0.26823845505714417, 'timestamp': '2025-10-01 04:17:48.109262', 'step': 1587, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:48.162814', 'step': 1587, 'epoch': 1} {'type': 'loss', 'content': 0.16920703649520874, 'timestamp': '2025-10-01 04:17:48.168895', 'step': 1588, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:48.228855', 'step': 1588, 'epoch': 1} {'type': 'loss', 'content': 0.14139272272586823, 'timestamp': '2025-10-01 04:17:48.231303', 'step': 1589, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:48.284200', 'step': 1589, 'epoch': 1} {'type': 'loss', 'content': 0.2106039822101593, 'timestamp': '2025-10-01 04:17:48.286394', 'step': 1590, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:48.339261', 'step': 1590, 'epoch': 1} {'type': 'loss', 'content': 0.20239917933940887, 'timestamp': '2025-10-01 04:17:48.341249', 'step': 1591, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:48.395998', 'step': 1591, 'epoch': 1} {'type': 'loss', 'content': 0.24264980852603912, 'timestamp': '2025-10-01 04:17:48.401576', 'step': 1592, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:48.455903', 'step': 1592, 'epoch': 1} {'type': 'loss', 'content': 0.15903662145137787, 'timestamp': '2025-10-01 04:17:48.457956', 'step': 1593, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:48.510593', 'step': 1593, 'epoch': 1} {'type': 'loss', 'content': 0.3075336813926697, 'timestamp': '2025-10-01 04:17:48.513844', 'step': 1594, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:48.567174', 'step': 1594, 'epoch': 1} {'type': 'loss', 'content': 0.1424141824245453, 'timestamp': '2025-10-01 04:17:48.569165', 'step': 1595, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:48.621999', 'step': 1595, 'epoch': 1} {'type': 'loss', 'content': 0.23082002997398376, 'timestamp': '2025-10-01 04:17:48.627942', 'step': 1596, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:48.682301', 'step': 1596, 'epoch': 1} {'type': 'loss', 'content': 0.1996462494134903, 'timestamp': '2025-10-01 04:17:48.684850', 'step': 1597, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:48.738392', 'step': 1597, 'epoch': 1} {'type': 'loss', 'content': 0.16051718592643738, 'timestamp': '2025-10-01 04:17:48.740489', 'step': 1598, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:48.794209', 'step': 1598, 'epoch': 1} {'type': 'loss', 'content': 0.19686901569366455, 'timestamp': '2025-10-01 04:17:48.796146', 'step': 1599, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:48.851296', 'step': 1599, 'epoch': 1} {'type': 'loss', 'content': 0.19108359515666962, 'timestamp': '2025-10-01 04:17:48.856780', 'step': 1600, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:48.909799', 'step': 1600, 'epoch': 1} {'type': 'loss', 'content': 0.15791311860084534, 'timestamp': '2025-10-01 04:17:48.911856', 'step': 1601, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:48.964294', 'step': 1601, 'epoch': 1} {'type': 'loss', 'content': 0.16654786467552185, 'timestamp': '2025-10-01 04:17:48.966292', 'step': 1602, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:17:49.019763', 'step': 1602, 'epoch': 1} {'type': 'loss', 'content': 0.14981883764266968, 'timestamp': '2025-10-01 04:17:49.022052', 'step': 1603, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:49.075334', 'step': 1603, 'epoch': 1} {'type': 'loss', 'content': 0.16137558221817017, 'timestamp': '2025-10-01 04:17:49.081446', 'step': 1604, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:49.138966', 'step': 1604, 'epoch': 1} {'type': 'loss', 'content': 0.14730827510356903, 'timestamp': '2025-10-01 04:17:49.142002', 'step': 1605, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:49.194843', 'step': 1605, 'epoch': 1} {'type': 'loss', 'content': 0.22930309176445007, 'timestamp': '2025-10-01 04:17:49.201406', 'step': 1606, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:17:49.254843', 'step': 1606, 'epoch': 1} {'type': 'loss', 'content': 0.09207256883382797, 'timestamp': '2025-10-01 04:17:49.256792', 'step': 1607, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:49.320199', 'step': 1607, 'epoch': 1} {'type': 'loss', 'content': 0.17988364398479462, 'timestamp': '2025-10-01 04:17:49.325828', 'step': 1608, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:49.378598', 'step': 1608, 'epoch': 1} {'type': 'loss', 'content': 0.19085463881492615, 'timestamp': '2025-10-01 04:17:49.380822', 'step': 1609, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:49.438367', 'step': 1609, 'epoch': 1} {'type': 'loss', 'content': 0.1945439875125885, 'timestamp': '2025-10-01 04:17:49.445199', 'step': 1610, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:49.506809', 'step': 1610, 'epoch': 1} {'type': 'loss', 'content': 0.1871916800737381, 'timestamp': '2025-10-01 04:17:49.508809', 'step': 1611, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:49.561157', 'step': 1611, 'epoch': 1} {'type': 'loss', 'content': 0.22487260401248932, 'timestamp': '2025-10-01 04:17:49.567440', 'step': 1612, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:49.627593', 'step': 1612, 'epoch': 1} {'type': 'loss', 'content': 0.14925581216812134, 'timestamp': '2025-10-01 04:17:49.631109', 'step': 1613, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:49.683993', 'step': 1613, 'epoch': 1} {'type': 'loss', 'content': 0.16139955818653107, 'timestamp': '2025-10-01 04:17:49.685824', 'step': 1614, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:49.747215', 'step': 1614, 'epoch': 1} {'type': 'loss', 'content': 0.31974244117736816, 'timestamp': '2025-10-01 04:17:49.749348', 'step': 1615, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:49.802271', 'step': 1615, 'epoch': 1} {'type': 'loss', 'content': 0.1535245180130005, 'timestamp': '2025-10-01 04:17:49.807959', 'step': 1616, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:49.860971', 'step': 1616, 'epoch': 1} {'type': 'loss', 'content': 0.21727100014686584, 'timestamp': '2025-10-01 04:17:49.863348', 'step': 1617, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:49.922090', 'step': 1617, 'epoch': 1} {'type': 'loss', 'content': 0.1910306215286255, 'timestamp': '2025-10-01 04:17:49.924226', 'step': 1618, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:49.977682', 'step': 1618, 'epoch': 1} {'type': 'loss', 'content': 0.17892351746559143, 'timestamp': '2025-10-01 04:17:49.979674', 'step': 1619, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:50.034362', 'step': 1619, 'epoch': 1} {'type': 'loss', 'content': 0.14663860201835632, 'timestamp': '2025-10-01 04:17:50.039842', 'step': 1620, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:50.092689', 'step': 1620, 'epoch': 1} {'type': 'loss', 'content': 0.23459281027317047, 'timestamp': '2025-10-01 04:17:50.094947', 'step': 1621, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:50.147164', 'step': 1621, 'epoch': 1} {'type': 'loss', 'content': 0.19413509964942932, 'timestamp': '2025-10-01 04:17:50.149228', 'step': 1622, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:17:50.202816', 'step': 1622, 'epoch': 1} {'type': 'loss', 'content': 0.18430538475513458, 'timestamp': '2025-10-01 04:17:50.205490', 'step': 1623, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:50.258477', 'step': 1623, 'epoch': 1} {'type': 'loss', 'content': 0.3506481945514679, 'timestamp': '2025-10-01 04:17:50.264329', 'step': 1624, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:50.317970', 'step': 1624, 'epoch': 1} {'type': 'loss', 'content': 0.16879236698150635, 'timestamp': '2025-10-01 04:17:50.319899', 'step': 1625, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:50.386711', 'step': 1625, 'epoch': 1} {'type': 'loss', 'content': 0.09750023484230042, 'timestamp': '2025-10-01 04:17:50.389587', 'step': 1626, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:17:50.443998', 'step': 1626, 'epoch': 1} {'type': 'loss', 'content': 0.13956257700920105, 'timestamp': '2025-10-01 04:17:50.446087', 'step': 1627, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:50.500477', 'step': 1627, 'epoch': 1} {'type': 'loss', 'content': 0.12995105981826782, 'timestamp': '2025-10-01 04:17:50.505912', 'step': 1628, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:50.558432', 'step': 1628, 'epoch': 1} {'type': 'loss', 'content': 0.16365014016628265, 'timestamp': '2025-10-01 04:17:50.560546', 'step': 1629, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:17:50.613463', 'step': 1629, 'epoch': 1} {'type': 'loss', 'content': 0.16270707547664642, 'timestamp': '2025-10-01 04:17:50.615592', 'step': 1630, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:50.668827', 'step': 1630, 'epoch': 1} {'type': 'loss', 'content': 0.1323910653591156, 'timestamp': '2025-10-01 04:17:50.670886', 'step': 1631, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:17:50.725688', 'step': 1631, 'epoch': 1} {'type': 'loss', 'content': 0.18460780382156372, 'timestamp': '2025-10-01 04:17:50.731334', 'step': 1632, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:17:50.783673', 'step': 1632, 'epoch': 1} {'type': 'loss', 'content': 0.1734725832939148, 'timestamp': '2025-10-01 04:17:50.786045', 'step': 1633, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:50.839884', 'step': 1633, 'epoch': 1} {'type': 'loss', 'content': 0.15258052945137024, 'timestamp': '2025-10-01 04:17:50.846927', 'step': 1634, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:50.902456', 'step': 1634, 'epoch': 1} {'type': 'loss', 'content': 0.1819542497396469, 'timestamp': '2025-10-01 04:17:50.905329', 'step': 1635, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:50.962855', 'step': 1635, 'epoch': 1} {'type': 'loss', 'content': 0.176248699426651, 'timestamp': '2025-10-01 04:17:50.969187', 'step': 1636, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:51.026612', 'step': 1636, 'epoch': 1} {'type': 'loss', 'content': 0.24978740513324738, 'timestamp': '2025-10-01 04:17:51.034423', 'step': 1637, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:51.090217', 'step': 1637, 'epoch': 1} {'type': 'loss', 'content': 0.29956069588661194, 'timestamp': '2025-10-01 04:17:51.093143', 'step': 1638, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:51.154448', 'step': 1638, 'epoch': 1} {'type': 'loss', 'content': 0.24632933735847473, 'timestamp': '2025-10-01 04:17:51.158045', 'step': 1639, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:51.213260', 'step': 1639, 'epoch': 1} {'type': 'loss', 'content': 0.21624855697155, 'timestamp': '2025-10-01 04:17:51.218852', 'step': 1640, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:51.271355', 'step': 1640, 'epoch': 1} {'type': 'loss', 'content': 0.19469022750854492, 'timestamp': '2025-10-01 04:17:51.275491', 'step': 1641, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:51.330980', 'step': 1641, 'epoch': 1} {'type': 'loss', 'content': 0.1645161509513855, 'timestamp': '2025-10-01 04:17:51.333513', 'step': 1642, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:51.390106', 'step': 1642, 'epoch': 1} {'type': 'loss', 'content': 0.17027045786380768, 'timestamp': '2025-10-01 04:17:51.392023', 'step': 1643, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:51.444798', 'step': 1643, 'epoch': 1} {'type': 'loss', 'content': 0.2634793519973755, 'timestamp': '2025-10-01 04:17:51.450963', 'step': 1644, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:51.510187', 'step': 1644, 'epoch': 1} {'type': 'loss', 'content': 0.11191233992576599, 'timestamp': '2025-10-01 04:17:51.512328', 'step': 1645, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:51.566104', 'step': 1645, 'epoch': 1} {'type': 'loss', 'content': 0.11849595606327057, 'timestamp': '2025-10-01 04:17:51.568431', 'step': 1646, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:51.621713', 'step': 1646, 'epoch': 1} {'type': 'loss', 'content': 0.13769640028476715, 'timestamp': '2025-10-01 04:17:51.623925', 'step': 1647, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:17:51.677111', 'step': 1647, 'epoch': 1} {'type': 'loss', 'content': 0.16728658974170685, 'timestamp': '2025-10-01 04:17:51.682516', 'step': 1648, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:51.735982', 'step': 1648, 'epoch': 1} {'type': 'loss', 'content': 0.16906185448169708, 'timestamp': '2025-10-01 04:17:51.738049', 'step': 1649, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:51.790603', 'step': 1649, 'epoch': 1} {'type': 'loss', 'content': 0.19791744649410248, 'timestamp': '2025-10-01 04:17:51.792793', 'step': 1650, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:51.852299', 'step': 1650, 'epoch': 1} {'type': 'loss', 'content': 0.18699383735656738, 'timestamp': '2025-10-01 04:17:51.859408', 'step': 1651, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:51.912745', 'step': 1651, 'epoch': 1} {'type': 'loss', 'content': 0.19202716648578644, 'timestamp': '2025-10-01 04:17:51.918068', 'step': 1652, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:51.970342', 'step': 1652, 'epoch': 1} {'type': 'loss', 'content': 0.18398252129554749, 'timestamp': '2025-10-01 04:17:51.972223', 'step': 1653, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:52.024578', 'step': 1653, 'epoch': 1} {'type': 'loss', 'content': 0.12907403707504272, 'timestamp': '2025-10-01 04:17:52.026897', 'step': 1654, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:52.080454', 'step': 1654, 'epoch': 1} {'type': 'loss', 'content': 0.22233165800571442, 'timestamp': '2025-10-01 04:17:52.082837', 'step': 1655, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:52.137785', 'step': 1655, 'epoch': 1} {'type': 'loss', 'content': 0.31390443444252014, 'timestamp': '2025-10-01 04:17:52.143368', 'step': 1656, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:52.197377', 'step': 1656, 'epoch': 1} {'type': 'loss', 'content': 0.1684150993824005, 'timestamp': '2025-10-01 04:17:52.199459', 'step': 1657, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:52.252448', 'step': 1657, 'epoch': 1} {'type': 'loss', 'content': 0.12859243154525757, 'timestamp': '2025-10-01 04:17:52.254774', 'step': 1658, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:52.307867', 'step': 1658, 'epoch': 1} {'type': 'loss', 'content': 0.154527947306633, 'timestamp': '2025-10-01 04:17:52.310661', 'step': 1659, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:52.364233', 'step': 1659, 'epoch': 1} {'type': 'loss', 'content': 0.15315252542495728, 'timestamp': '2025-10-01 04:17:52.370768', 'step': 1660, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:17:52.423759', 'step': 1660, 'epoch': 1} {'type': 'loss', 'content': 0.16196732223033905, 'timestamp': '2025-10-01 04:17:52.425850', 'step': 1661, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:17:52.478713', 'step': 1661, 'epoch': 1} {'type': 'loss', 'content': 0.18895812332630157, 'timestamp': '2025-10-01 04:17:52.481216', 'step': 1662, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:52.534358', 'step': 1662, 'epoch': 1} {'type': 'loss', 'content': 0.14095613360404968, 'timestamp': '2025-10-01 04:17:52.537318', 'step': 1663, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:17:52.591346', 'step': 1663, 'epoch': 1} {'type': 'loss', 'content': 0.12817859649658203, 'timestamp': '2025-10-01 04:17:52.596881', 'step': 1664, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:52.649846', 'step': 1664, 'epoch': 1} {'type': 'loss', 'content': 0.14990033209323883, 'timestamp': '2025-10-01 04:17:52.652126', 'step': 1665, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:52.705416', 'step': 1665, 'epoch': 1} {'type': 'loss', 'content': 0.1490742415189743, 'timestamp': '2025-10-01 04:17:52.707542', 'step': 1666, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:52.760781', 'step': 1666, 'epoch': 1} {'type': 'loss', 'content': 0.19750723242759705, 'timestamp': '2025-10-01 04:17:52.762748', 'step': 1667, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:52.823235', 'step': 1667, 'epoch': 1} {'type': 'loss', 'content': 0.21291790902614594, 'timestamp': '2025-10-01 04:17:52.828869', 'step': 1668, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:52.881695', 'step': 1668, 'epoch': 1} {'type': 'loss', 'content': 0.11425013840198517, 'timestamp': '2025-10-01 04:17:52.883561', 'step': 1669, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:52.936917', 'step': 1669, 'epoch': 1} {'type': 'loss', 'content': 0.2217877358198166, 'timestamp': '2025-10-01 04:17:52.938992', 'step': 1670, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:52.993656', 'step': 1670, 'epoch': 1} {'type': 'loss', 'content': 0.13199597597122192, 'timestamp': '2025-10-01 04:17:52.996010', 'step': 1671, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:17:53.050613', 'step': 1671, 'epoch': 1} {'type': 'loss', 'content': 0.1997966170310974, 'timestamp': '2025-10-01 04:17:53.066312', 'step': 1672, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:17:53.122334', 'step': 1672, 'epoch': 1} {'type': 'loss', 'content': 0.18375557661056519, 'timestamp': '2025-10-01 04:17:53.124570', 'step': 1673, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:53.189408', 'step': 1673, 'epoch': 1} {'type': 'loss', 'content': 0.15338508784770966, 'timestamp': '2025-10-01 04:17:53.191213', 'step': 1674, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:53.244482', 'step': 1674, 'epoch': 1} {'type': 'loss', 'content': 0.21339793503284454, 'timestamp': '2025-10-01 04:17:53.246868', 'step': 1675, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:53.305159', 'step': 1675, 'epoch': 1} {'type': 'loss', 'content': 0.18896277248859406, 'timestamp': '2025-10-01 04:17:53.311390', 'step': 1676, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:53.366765', 'step': 1676, 'epoch': 1} {'type': 'loss', 'content': 0.1251869946718216, 'timestamp': '2025-10-01 04:17:53.369088', 'step': 1677, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:53.422712', 'step': 1677, 'epoch': 1} {'type': 'loss', 'content': 0.1476164311170578, 'timestamp': '2025-10-01 04:17:53.425003', 'step': 1678, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-01 04:17:53.478998', 'step': 1678, 'epoch': 1} {'type': 'loss', 'content': 0.15177009999752045, 'timestamp': '2025-10-01 04:17:53.481259', 'step': 1679, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:53.535844', 'step': 1679, 'epoch': 1} {'type': 'loss', 'content': 0.19143755733966827, 'timestamp': '2025-10-01 04:17:53.541646', 'step': 1680, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:53.593888', 'step': 1680, 'epoch': 1} {'type': 'loss', 'content': 0.12401383370161057, 'timestamp': '2025-10-01 04:17:53.595694', 'step': 1681, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:53.647816', 'step': 1681, 'epoch': 1} {'type': 'loss', 'content': 0.13492253422737122, 'timestamp': '2025-10-01 04:17:53.649867', 'step': 1682, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:17:53.702979', 'step': 1682, 'epoch': 1} {'type': 'loss', 'content': 0.1465083211660385, 'timestamp': '2025-10-01 04:17:53.704963', 'step': 1683, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:53.758860', 'step': 1683, 'epoch': 1} {'type': 'loss', 'content': 0.18445852398872375, 'timestamp': '2025-10-01 04:17:53.768198', 'step': 1684, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:53.833211', 'step': 1684, 'epoch': 1} {'type': 'loss', 'content': 0.3105196952819824, 'timestamp': '2025-10-01 04:17:53.835191', 'step': 1685, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:53.888202', 'step': 1685, 'epoch': 1} {'type': 'loss', 'content': 0.224972203373909, 'timestamp': '2025-10-01 04:17:53.890103', 'step': 1686, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:53.949006', 'step': 1686, 'epoch': 1} {'type': 'loss', 'content': 0.25657859444618225, 'timestamp': '2025-10-01 04:17:53.951496', 'step': 1687, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:54.004254', 'step': 1687, 'epoch': 1} {'type': 'loss', 'content': 0.11237471550703049, 'timestamp': '2025-10-01 04:17:54.009717', 'step': 1688, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:54.062565', 'step': 1688, 'epoch': 1} {'type': 'loss', 'content': 0.15500865876674652, 'timestamp': '2025-10-01 04:17:54.064821', 'step': 1689, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:54.118001', 'step': 1689, 'epoch': 1} {'type': 'loss', 'content': 0.2759438753128052, 'timestamp': '2025-10-01 04:17:54.120454', 'step': 1690, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:17:54.173762', 'step': 1690, 'epoch': 1} {'type': 'loss', 'content': 0.29695144295692444, 'timestamp': '2025-10-01 04:17:54.175973', 'step': 1691, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:54.228690', 'step': 1691, 'epoch': 1} {'type': 'loss', 'content': 0.14486132562160492, 'timestamp': '2025-10-01 04:17:54.234315', 'step': 1692, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:17:54.295688', 'step': 1692, 'epoch': 1} {'type': 'loss', 'content': 0.2597441077232361, 'timestamp': '2025-10-01 04:17:54.301068', 'step': 1693, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:54.354397', 'step': 1693, 'epoch': 1} {'type': 'loss', 'content': 0.1517578810453415, 'timestamp': '2025-10-01 04:17:54.356536', 'step': 1694, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:17:54.414035', 'step': 1694, 'epoch': 1} {'type': 'loss', 'content': 0.134734109044075, 'timestamp': '2025-10-01 04:17:54.415923', 'step': 1695, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:54.468465', 'step': 1695, 'epoch': 1} {'type': 'loss', 'content': 0.13810066878795624, 'timestamp': '2025-10-01 04:17:54.475311', 'step': 1696, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:17:54.527969', 'step': 1696, 'epoch': 1} {'type': 'loss', 'content': 0.22711089253425598, 'timestamp': '2025-10-01 04:17:54.544694', 'step': 1697, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:54.597796', 'step': 1697, 'epoch': 1} {'type': 'loss', 'content': 0.1908760815858841, 'timestamp': '2025-10-01 04:17:54.599606', 'step': 1698, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:17:54.652825', 'step': 1698, 'epoch': 1} {'type': 'loss', 'content': 0.16033151745796204, 'timestamp': '2025-10-01 04:17:54.654634', 'step': 1699, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:17:54.707140', 'step': 1699, 'epoch': 1} {'type': 'loss', 'content': 0.19796733558177948, 'timestamp': '2025-10-01 04:17:54.712779', 'step': 1700, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:17:54.765057', 'step': 1700, 'epoch': 1} {'type': 'loss', 'content': 0.10859496146440506, 'timestamp': '2025-10-01 04:17:54.767213', 'step': 1701, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:54.824180', 'step': 1701, 'epoch': 1} {'type': 'loss', 'content': 0.1538461446762085, 'timestamp': '2025-10-01 04:17:54.826129', 'step': 1702, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:54.880979', 'step': 1702, 'epoch': 1} {'type': 'loss', 'content': 0.14532610774040222, 'timestamp': '2025-10-01 04:17:54.882954', 'step': 1703, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:54.936690', 'step': 1703, 'epoch': 1} {'type': 'loss', 'content': 0.2050837129354477, 'timestamp': '2025-10-01 04:17:54.942615', 'step': 1704, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:54.995044', 'step': 1704, 'epoch': 1} {'type': 'loss', 'content': 0.2449461817741394, 'timestamp': '2025-10-01 04:17:54.997082', 'step': 1705, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:55.056168', 'step': 1705, 'epoch': 1} {'type': 'loss', 'content': 0.14718244969844818, 'timestamp': '2025-10-01 04:17:55.058534', 'step': 1706, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:17:55.112186', 'step': 1706, 'epoch': 1} {'type': 'loss', 'content': 0.23598001897335052, 'timestamp': '2025-10-01 04:17:55.114372', 'step': 1707, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:55.170858', 'step': 1707, 'epoch': 1} {'type': 'loss', 'content': 0.1516493707895279, 'timestamp': '2025-10-01 04:17:55.176568', 'step': 1708, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:17:55.232478', 'step': 1708, 'epoch': 1} {'type': 'loss', 'content': 0.14531570672988892, 'timestamp': '2025-10-01 04:17:55.234556', 'step': 1709, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:55.294720', 'step': 1709, 'epoch': 1} {'type': 'loss', 'content': 0.22224171459674835, 'timestamp': '2025-10-01 04:17:55.297223', 'step': 1710, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:17:55.354713', 'step': 1710, 'epoch': 1} {'type': 'loss', 'content': 0.12110146880149841, 'timestamp': '2025-10-01 04:17:55.356851', 'step': 1711, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:55.410823', 'step': 1711, 'epoch': 1} {'type': 'loss', 'content': 0.19047826528549194, 'timestamp': '2025-10-01 04:17:55.416535', 'step': 1712, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:55.469177', 'step': 1712, 'epoch': 1} {'type': 'loss', 'content': 0.16797982156276703, 'timestamp': '2025-10-01 04:17:55.471127', 'step': 1713, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:55.523993', 'step': 1713, 'epoch': 1} {'type': 'loss', 'content': 0.270331472158432, 'timestamp': '2025-10-01 04:17:55.526081', 'step': 1714, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:55.579232', 'step': 1714, 'epoch': 1} {'type': 'loss', 'content': 0.2827857732772827, 'timestamp': '2025-10-01 04:17:55.581120', 'step': 1715, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:55.634004', 'step': 1715, 'epoch': 1} {'type': 'loss', 'content': 0.16181978583335876, 'timestamp': '2025-10-01 04:17:55.639434', 'step': 1716, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:55.692262', 'step': 1716, 'epoch': 1} {'type': 'loss', 'content': 0.1427406370639801, 'timestamp': '2025-10-01 04:17:55.694153', 'step': 1717, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:55.748659', 'step': 1717, 'epoch': 1} {'type': 'loss', 'content': 0.15818829834461212, 'timestamp': '2025-10-01 04:17:55.750683', 'step': 1718, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:55.803892', 'step': 1718, 'epoch': 1} {'type': 'loss', 'content': 0.3423037528991699, 'timestamp': '2025-10-01 04:17:55.806572', 'step': 1719, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:55.859788', 'step': 1719, 'epoch': 1} {'type': 'loss', 'content': 0.13360321521759033, 'timestamp': '2025-10-01 04:17:55.865567', 'step': 1720, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:55.918568', 'step': 1720, 'epoch': 1} {'type': 'loss', 'content': 0.18367160856723785, 'timestamp': '2025-10-01 04:17:55.920447', 'step': 1721, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:55.973554', 'step': 1721, 'epoch': 1} {'type': 'loss', 'content': 0.1883297711610794, 'timestamp': '2025-10-01 04:17:55.975410', 'step': 1722, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:17:56.028181', 'step': 1722, 'epoch': 1} {'type': 'loss', 'content': 0.1948404461145401, 'timestamp': '2025-10-01 04:17:56.032517', 'step': 1723, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:56.085264', 'step': 1723, 'epoch': 1} {'type': 'loss', 'content': 0.20120027661323547, 'timestamp': '2025-10-01 04:17:56.091332', 'step': 1724, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:17:56.146907', 'step': 1724, 'epoch': 1} {'type': 'loss', 'content': 0.10806132853031158, 'timestamp': '2025-10-01 04:17:56.148680', 'step': 1725, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:56.201169', 'step': 1725, 'epoch': 1} {'type': 'loss', 'content': 0.17888347804546356, 'timestamp': '2025-10-01 04:17:56.202932', 'step': 1726, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:56.255170', 'step': 1726, 'epoch': 1} {'type': 'loss', 'content': 0.1909143179655075, 'timestamp': '2025-10-01 04:17:56.257522', 'step': 1727, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:56.309713', 'step': 1727, 'epoch': 1} {'type': 'loss', 'content': 0.10640125721693039, 'timestamp': '2025-10-01 04:17:56.315427', 'step': 1728, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:56.368536', 'step': 1728, 'epoch': 1} {'type': 'loss', 'content': 0.2511058747768402, 'timestamp': '2025-10-01 04:17:56.370341', 'step': 1729, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:56.422472', 'step': 1729, 'epoch': 1} {'type': 'loss', 'content': 0.1906414031982422, 'timestamp': '2025-10-01 04:17:56.424594', 'step': 1730, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:56.478129', 'step': 1730, 'epoch': 1} {'type': 'loss', 'content': 0.09968747943639755, 'timestamp': '2025-10-01 04:17:56.480055', 'step': 1731, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:56.533141', 'step': 1731, 'epoch': 1} {'type': 'loss', 'content': 0.12635114789009094, 'timestamp': '2025-10-01 04:17:56.542458', 'step': 1732, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:17:56.596465', 'step': 1732, 'epoch': 1} {'type': 'loss', 'content': 0.2787393033504486, 'timestamp': '2025-10-01 04:17:56.598700', 'step': 1733, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:56.665248', 'step': 1733, 'epoch': 1} {'type': 'loss', 'content': 0.20010845363140106, 'timestamp': '2025-10-01 04:17:56.667288', 'step': 1734, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:56.722599', 'step': 1734, 'epoch': 1} {'type': 'loss', 'content': 0.19522999227046967, 'timestamp': '2025-10-01 04:17:56.724817', 'step': 1735, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:56.778227', 'step': 1735, 'epoch': 1} {'type': 'loss', 'content': 0.19720318913459778, 'timestamp': '2025-10-01 04:17:56.783788', 'step': 1736, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:56.837531', 'step': 1736, 'epoch': 1} {'type': 'loss', 'content': 0.13762114942073822, 'timestamp': '2025-10-01 04:17:56.839535', 'step': 1737, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:17:56.892180', 'step': 1737, 'epoch': 1} {'type': 'loss', 'content': 0.22736410796642303, 'timestamp': '2025-10-01 04:17:56.894385', 'step': 1738, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:56.947705', 'step': 1738, 'epoch': 1} {'type': 'loss', 'content': 0.1981578767299652, 'timestamp': '2025-10-01 04:17:56.949599', 'step': 1739, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:57.002787', 'step': 1739, 'epoch': 1} {'type': 'loss', 'content': 0.22766566276550293, 'timestamp': '2025-10-01 04:17:57.008245', 'step': 1740, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:57.060692', 'step': 1740, 'epoch': 1} {'type': 'loss', 'content': 0.17078272998332977, 'timestamp': '2025-10-01 04:17:57.062821', 'step': 1741, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:57.115747', 'step': 1741, 'epoch': 1} {'type': 'loss', 'content': 0.16963674128055573, 'timestamp': '2025-10-01 04:17:57.118330', 'step': 1742, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:57.173649', 'step': 1742, 'epoch': 1} {'type': 'loss', 'content': 0.188471719622612, 'timestamp': '2025-10-01 04:17:57.175831', 'step': 1743, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:57.229488', 'step': 1743, 'epoch': 1} {'type': 'loss', 'content': 0.16008667647838593, 'timestamp': '2025-10-01 04:17:57.234892', 'step': 1744, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:57.286917', 'step': 1744, 'epoch': 1} {'type': 'loss', 'content': 0.17468123137950897, 'timestamp': '2025-10-01 04:17:57.289330', 'step': 1745, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:57.342645', 'step': 1745, 'epoch': 1} {'type': 'loss', 'content': 0.15335609018802643, 'timestamp': '2025-10-01 04:17:57.344574', 'step': 1746, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:57.397859', 'step': 1746, 'epoch': 1} {'type': 'loss', 'content': 0.13450631499290466, 'timestamp': '2025-10-01 04:17:57.404968', 'step': 1747, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:57.457373', 'step': 1747, 'epoch': 1} {'type': 'loss', 'content': 0.20486266911029816, 'timestamp': '2025-10-01 04:17:57.463101', 'step': 1748, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:57.525770', 'step': 1748, 'epoch': 1} {'type': 'loss', 'content': 0.18822747468948364, 'timestamp': '2025-10-01 04:17:57.528142', 'step': 1749, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:57.580765', 'step': 1749, 'epoch': 1} {'type': 'loss', 'content': 0.1509172022342682, 'timestamp': '2025-10-01 04:17:57.582740', 'step': 1750, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:57.644127', 'step': 1750, 'epoch': 1} {'type': 'loss', 'content': 0.149079829454422, 'timestamp': '2025-10-01 04:17:57.646315', 'step': 1751, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:57.700495', 'step': 1751, 'epoch': 1} {'type': 'loss', 'content': 0.2741798758506775, 'timestamp': '2025-10-01 04:17:57.707674', 'step': 1752, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:57.761639', 'step': 1752, 'epoch': 1} {'type': 'loss', 'content': 0.18344023823738098, 'timestamp': '2025-10-01 04:17:57.763530', 'step': 1753, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:57.816887', 'step': 1753, 'epoch': 1} {'type': 'loss', 'content': 0.20157817006111145, 'timestamp': '2025-10-01 04:17:57.818894', 'step': 1754, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:57.874343', 'step': 1754, 'epoch': 1} {'type': 'loss', 'content': 0.1067240983247757, 'timestamp': '2025-10-01 04:17:57.876252', 'step': 1755, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:57.930483', 'step': 1755, 'epoch': 1} {'type': 'loss', 'content': 0.09973441064357758, 'timestamp': '2025-10-01 04:17:57.936740', 'step': 1756, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:57.989932', 'step': 1756, 'epoch': 1} {'type': 'loss', 'content': 0.132681742310524, 'timestamp': '2025-10-01 04:17:57.991881', 'step': 1757, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:58.045173', 'step': 1757, 'epoch': 1} {'type': 'loss', 'content': 0.2351273000240326, 'timestamp': '2025-10-01 04:17:58.047380', 'step': 1758, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:58.104149', 'step': 1758, 'epoch': 1} {'type': 'loss', 'content': 0.14614690840244293, 'timestamp': '2025-10-01 04:17:58.106742', 'step': 1759, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:17:58.160425', 'step': 1759, 'epoch': 1} {'type': 'loss', 'content': 0.12274692952632904, 'timestamp': '2025-10-01 04:17:58.166167', 'step': 1760, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:58.218364', 'step': 1760, 'epoch': 1} {'type': 'loss', 'content': 0.1320093274116516, 'timestamp': '2025-10-01 04:17:58.220934', 'step': 1761, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:58.273882', 'step': 1761, 'epoch': 1} {'type': 'loss', 'content': 0.1790258139371872, 'timestamp': '2025-10-01 04:17:58.275976', 'step': 1762, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:58.329223', 'step': 1762, 'epoch': 1} {'type': 'loss', 'content': 0.17914468050003052, 'timestamp': '2025-10-01 04:17:58.331470', 'step': 1763, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:58.385689', 'step': 1763, 'epoch': 1} {'type': 'loss', 'content': 0.21904806792736053, 'timestamp': '2025-10-01 04:17:58.392303', 'step': 1764, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:58.445741', 'step': 1764, 'epoch': 1} {'type': 'loss', 'content': 0.13145364820957184, 'timestamp': '2025-10-01 04:17:58.465243', 'step': 1765, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:17:58.518297', 'step': 1765, 'epoch': 1} {'type': 'loss', 'content': 0.13263504207134247, 'timestamp': '2025-10-01 04:17:58.520417', 'step': 1766, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:58.575493', 'step': 1766, 'epoch': 1} {'type': 'loss', 'content': 0.12796363234519958, 'timestamp': '2025-10-01 04:17:58.577486', 'step': 1767, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:58.630206', 'step': 1767, 'epoch': 1} {'type': 'loss', 'content': 0.17127454280853271, 'timestamp': '2025-10-01 04:17:58.635825', 'step': 1768, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:58.690217', 'step': 1768, 'epoch': 1} {'type': 'loss', 'content': 0.1307602822780609, 'timestamp': '2025-10-01 04:17:58.692144', 'step': 1769, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:58.747641', 'step': 1769, 'epoch': 1} {'type': 'loss', 'content': 0.2108118236064911, 'timestamp': '2025-10-01 04:17:58.749803', 'step': 1770, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:58.812924', 'step': 1770, 'epoch': 1} {'type': 'loss', 'content': 0.15701204538345337, 'timestamp': '2025-10-01 04:17:58.814858', 'step': 1771, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:58.867964', 'step': 1771, 'epoch': 1} {'type': 'loss', 'content': 0.20760956406593323, 'timestamp': '2025-10-01 04:17:58.873822', 'step': 1772, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:58.928323', 'step': 1772, 'epoch': 1} {'type': 'loss', 'content': 0.23665665090084076, 'timestamp': '2025-10-01 04:17:58.930269', 'step': 1773, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:17:58.983265', 'step': 1773, 'epoch': 1} {'type': 'loss', 'content': 0.2517392039299011, 'timestamp': '2025-10-01 04:17:58.985199', 'step': 1774, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:59.038065', 'step': 1774, 'epoch': 1} {'type': 'loss', 'content': 0.2237004190683365, 'timestamp': '2025-10-01 04:17:59.040057', 'step': 1775, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:59.092960', 'step': 1775, 'epoch': 1} {'type': 'loss', 'content': 0.1655951738357544, 'timestamp': '2025-10-01 04:17:59.098895', 'step': 1776, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:59.152502', 'step': 1776, 'epoch': 1} {'type': 'loss', 'content': 0.23940840363502502, 'timestamp': '2025-10-01 04:17:59.156221', 'step': 1777, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:59.212085', 'step': 1777, 'epoch': 1} {'type': 'loss', 'content': 0.19874876737594604, 'timestamp': '2025-10-01 04:17:59.225514', 'step': 1778, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:59.286706', 'step': 1778, 'epoch': 1} {'type': 'loss', 'content': 0.20798085629940033, 'timestamp': '2025-10-01 04:17:59.288556', 'step': 1779, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:17:59.346954', 'step': 1779, 'epoch': 1} {'type': 'loss', 'content': 0.19280824065208435, 'timestamp': '2025-10-01 04:17:59.353856', 'step': 1780, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:59.409863', 'step': 1780, 'epoch': 1} {'type': 'loss', 'content': 0.28926751017570496, 'timestamp': '2025-10-01 04:17:59.411847', 'step': 1781, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:59.466582', 'step': 1781, 'epoch': 1} {'type': 'loss', 'content': 0.1631682962179184, 'timestamp': '2025-10-01 04:17:59.469574', 'step': 1782, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:59.527338', 'step': 1782, 'epoch': 1} {'type': 'loss', 'content': 0.16975079476833344, 'timestamp': '2025-10-01 04:17:59.529383', 'step': 1783, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:17:59.585303', 'step': 1783, 'epoch': 1} {'type': 'loss', 'content': 0.22106122970581055, 'timestamp': '2025-10-01 04:17:59.591762', 'step': 1784, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:17:59.660534', 'step': 1784, 'epoch': 1} {'type': 'loss', 'content': 0.15762394666671753, 'timestamp': '2025-10-01 04:17:59.663047', 'step': 1785, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:59.716929', 'step': 1785, 'epoch': 1} {'type': 'loss', 'content': 0.09244413673877716, 'timestamp': '2025-10-01 04:17:59.719118', 'step': 1786, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:59.773452', 'step': 1786, 'epoch': 1} {'type': 'loss', 'content': 0.16202688217163086, 'timestamp': '2025-10-01 04:17:59.776198', 'step': 1787, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:59.832615', 'step': 1787, 'epoch': 1} {'type': 'loss', 'content': 0.13151922821998596, 'timestamp': '2025-10-01 04:17:59.838699', 'step': 1788, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:17:59.891442', 'step': 1788, 'epoch': 1} {'type': 'loss', 'content': 0.20497320592403412, 'timestamp': '2025-10-01 04:17:59.893569', 'step': 1789, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:17:59.946912', 'step': 1789, 'epoch': 1} {'type': 'loss', 'content': 0.21125838160514832, 'timestamp': '2025-10-01 04:17:59.948946', 'step': 1790, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:00.002135', 'step': 1790, 'epoch': 1} {'type': 'loss', 'content': 0.1604931354522705, 'timestamp': '2025-10-01 04:18:00.004352', 'step': 1791, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:00.061055', 'step': 1791, 'epoch': 1} {'type': 'loss', 'content': 0.20990543067455292, 'timestamp': '2025-10-01 04:18:00.066938', 'step': 1792, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:00.119519', 'step': 1792, 'epoch': 1} {'type': 'loss', 'content': 0.16127219796180725, 'timestamp': '2025-10-01 04:18:00.121498', 'step': 1793, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:00.174291', 'step': 1793, 'epoch': 1} {'type': 'loss', 'content': 0.23187649250030518, 'timestamp': '2025-10-01 04:18:00.176087', 'step': 1794, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:00.228951', 'step': 1794, 'epoch': 1} {'type': 'loss', 'content': 0.1334594488143921, 'timestamp': '2025-10-01 04:18:00.231218', 'step': 1795, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:00.284499', 'step': 1795, 'epoch': 1} {'type': 'loss', 'content': 0.18843521177768707, 'timestamp': '2025-10-01 04:18:00.290761', 'step': 1796, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:00.345831', 'step': 1796, 'epoch': 1} {'type': 'loss', 'content': 0.26580506563186646, 'timestamp': '2025-10-01 04:18:00.347985', 'step': 1797, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:18:00.401571', 'step': 1797, 'epoch': 1} {'type': 'loss', 'content': 0.27898648381233215, 'timestamp': '2025-10-01 04:18:00.403762', 'step': 1798, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:00.457920', 'step': 1798, 'epoch': 1} {'type': 'loss', 'content': 0.17010493576526642, 'timestamp': '2025-10-01 04:18:00.460173', 'step': 1799, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:00.513687', 'step': 1799, 'epoch': 1} {'type': 'loss', 'content': 0.1454000025987625, 'timestamp': '2025-10-01 04:18:00.519483', 'step': 1800, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:00.572411', 'step': 1800, 'epoch': 1} {'type': 'loss', 'content': 0.17329680919647217, 'timestamp': '2025-10-01 04:18:00.575247', 'step': 1801, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:00.628372', 'step': 1801, 'epoch': 1} {'type': 'loss', 'content': 0.09712792187929153, 'timestamp': '2025-10-01 04:18:00.630822', 'step': 1802, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:00.684544', 'step': 1802, 'epoch': 1} {'type': 'loss', 'content': 0.14108800888061523, 'timestamp': '2025-10-01 04:18:00.686788', 'step': 1803, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:00.740540', 'step': 1803, 'epoch': 1} {'type': 'loss', 'content': 0.15275725722312927, 'timestamp': '2025-10-01 04:18:00.748248', 'step': 1804, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:18:00.807492', 'step': 1804, 'epoch': 1} {'type': 'loss', 'content': 0.1871151328086853, 'timestamp': '2025-10-01 04:18:00.809684', 'step': 1805, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:00.863144', 'step': 1805, 'epoch': 1} {'type': 'loss', 'content': 0.20016637444496155, 'timestamp': '2025-10-01 04:18:00.866474', 'step': 1806, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:00.921442', 'step': 1806, 'epoch': 1} {'type': 'loss', 'content': 0.16632577776908875, 'timestamp': '2025-10-01 04:18:00.923458', 'step': 1807, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:18:00.977206', 'step': 1807, 'epoch': 1} {'type': 'loss', 'content': 0.25070950388908386, 'timestamp': '2025-10-01 04:18:00.983140', 'step': 1808, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:01.037155', 'step': 1808, 'epoch': 1} {'type': 'loss', 'content': 0.23899903893470764, 'timestamp': '2025-10-01 04:18:01.039097', 'step': 1809, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-01 04:18:01.092278', 'step': 1809, 'epoch': 1} {'type': 'loss', 'content': 0.18752805888652802, 'timestamp': '2025-10-01 04:18:01.094381', 'step': 1810, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:01.147670', 'step': 1810, 'epoch': 1} {'type': 'loss', 'content': 0.1479981243610382, 'timestamp': '2025-10-01 04:18:01.149696', 'step': 1811, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:01.202768', 'step': 1811, 'epoch': 1} {'type': 'loss', 'content': 0.21004332602024078, 'timestamp': '2025-10-01 04:18:01.208610', 'step': 1812, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:01.261939', 'step': 1812, 'epoch': 1} {'type': 'loss', 'content': 0.1612308770418167, 'timestamp': '2025-10-01 04:18:01.264716', 'step': 1813, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-01 04:18:01.319483', 'step': 1813, 'epoch': 1} {'type': 'loss', 'content': 0.12903951108455658, 'timestamp': '2025-10-01 04:18:01.323893', 'step': 1814, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:01.385004', 'step': 1814, 'epoch': 1} {'type': 'loss', 'content': 0.14745937287807465, 'timestamp': '2025-10-01 04:18:01.387299', 'step': 1815, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:01.445104', 'step': 1815, 'epoch': 1} {'type': 'loss', 'content': 0.22475683689117432, 'timestamp': '2025-10-01 04:18:01.450913', 'step': 1816, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:01.503537', 'step': 1816, 'epoch': 1} {'type': 'loss', 'content': 0.15840263664722443, 'timestamp': '2025-10-01 04:18:01.505968', 'step': 1817, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:01.561029', 'step': 1817, 'epoch': 1} {'type': 'loss', 'content': 0.09212026000022888, 'timestamp': '2025-10-01 04:18:01.562957', 'step': 1818, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:01.616559', 'step': 1818, 'epoch': 1} {'type': 'loss', 'content': 0.16623620688915253, 'timestamp': '2025-10-01 04:18:01.618999', 'step': 1819, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:01.673424', 'step': 1819, 'epoch': 1} {'type': 'loss', 'content': 0.22620949149131775, 'timestamp': '2025-10-01 04:18:01.679676', 'step': 1820, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:01.733946', 'step': 1820, 'epoch': 1} {'type': 'loss', 'content': 0.15080156922340393, 'timestamp': '2025-10-01 04:18:01.736222', 'step': 1821, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:01.790233', 'step': 1821, 'epoch': 1} {'type': 'loss', 'content': 0.16651055216789246, 'timestamp': '2025-10-01 04:18:01.792286', 'step': 1822, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:01.844700', 'step': 1822, 'epoch': 1} {'type': 'loss', 'content': 0.1262250542640686, 'timestamp': '2025-10-01 04:18:01.846566', 'step': 1823, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:01.904324', 'step': 1823, 'epoch': 1} {'type': 'loss', 'content': 0.1411101222038269, 'timestamp': '2025-10-01 04:18:01.910024', 'step': 1824, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:18:01.962877', 'step': 1824, 'epoch': 1} {'type': 'loss', 'content': 0.21688169240951538, 'timestamp': '2025-10-01 04:18:01.964914', 'step': 1825, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:02.018174', 'step': 1825, 'epoch': 1} {'type': 'loss', 'content': 0.1843024641275406, 'timestamp': '2025-10-01 04:18:02.019984', 'step': 1826, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:02.074084', 'step': 1826, 'epoch': 1} {'type': 'loss', 'content': 0.18347132205963135, 'timestamp': '2025-10-01 04:18:02.076181', 'step': 1827, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:02.129767', 'step': 1827, 'epoch': 1} {'type': 'loss', 'content': 0.10192904621362686, 'timestamp': '2025-10-01 04:18:02.138377', 'step': 1828, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:02.195750', 'step': 1828, 'epoch': 1} {'type': 'loss', 'content': 0.16297249495983124, 'timestamp': '2025-10-01 04:18:02.197670', 'step': 1829, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:02.250276', 'step': 1829, 'epoch': 1} {'type': 'loss', 'content': 0.08064891397953033, 'timestamp': '2025-10-01 04:18:02.252246', 'step': 1830, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:02.305065', 'step': 1830, 'epoch': 1} {'type': 'loss', 'content': 0.16506889462471008, 'timestamp': '2025-10-01 04:18:02.316089', 'step': 1831, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:02.369245', 'step': 1831, 'epoch': 1} {'type': 'loss', 'content': 0.182954341173172, 'timestamp': '2025-10-01 04:18:02.374880', 'step': 1832, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:18:02.427078', 'step': 1832, 'epoch': 1} {'type': 'loss', 'content': 0.23033633828163147, 'timestamp': '2025-10-01 04:18:02.429117', 'step': 1833, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:02.482120', 'step': 1833, 'epoch': 1} {'type': 'loss', 'content': 0.21953383088111877, 'timestamp': '2025-10-01 04:18:02.484370', 'step': 1834, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:02.538089', 'step': 1834, 'epoch': 1} {'type': 'loss', 'content': 0.14293847978115082, 'timestamp': '2025-10-01 04:18:02.540321', 'step': 1835, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:02.594179', 'step': 1835, 'epoch': 1} {'type': 'loss', 'content': 0.20297813415527344, 'timestamp': '2025-10-01 04:18:02.599807', 'step': 1836, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:02.652539', 'step': 1836, 'epoch': 1} {'type': 'loss', 'content': 0.1137164980173111, 'timestamp': '2025-10-01 04:18:02.654560', 'step': 1837, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:18:02.707133', 'step': 1837, 'epoch': 1} {'type': 'loss', 'content': 0.2240017056465149, 'timestamp': '2025-10-01 04:18:02.709057', 'step': 1838, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:02.761354', 'step': 1838, 'epoch': 1} {'type': 'loss', 'content': 0.16141073405742645, 'timestamp': '2025-10-01 04:18:02.763333', 'step': 1839, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:02.815901', 'step': 1839, 'epoch': 1} {'type': 'loss', 'content': 0.22770695388317108, 'timestamp': '2025-10-01 04:18:02.823032', 'step': 1840, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:02.876017', 'step': 1840, 'epoch': 1} {'type': 'loss', 'content': 0.1397569179534912, 'timestamp': '2025-10-01 04:18:02.878165', 'step': 1841, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:02.930737', 'step': 1841, 'epoch': 1} {'type': 'loss', 'content': 0.16485892236232758, 'timestamp': '2025-10-01 04:18:02.935555', 'step': 1842, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:02.989877', 'step': 1842, 'epoch': 1} {'type': 'loss', 'content': 0.16474853456020355, 'timestamp': '2025-10-01 04:18:02.992570', 'step': 1843, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:03.045290', 'step': 1843, 'epoch': 1} {'type': 'loss', 'content': 0.20043863356113434, 'timestamp': '2025-10-01 04:18:03.051052', 'step': 1844, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:03.102884', 'step': 1844, 'epoch': 1} {'type': 'loss', 'content': 0.16020020842552185, 'timestamp': '2025-10-01 04:18:03.106134', 'step': 1845, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:03.161256', 'step': 1845, 'epoch': 1} {'type': 'loss', 'content': 0.2084396779537201, 'timestamp': '2025-10-01 04:18:03.163402', 'step': 1846, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:18:03.216702', 'step': 1846, 'epoch': 1} {'type': 'loss', 'content': 0.1496659219264984, 'timestamp': '2025-10-01 04:18:03.218627', 'step': 1847, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:03.270870', 'step': 1847, 'epoch': 1} {'type': 'loss', 'content': 0.18979521095752716, 'timestamp': '2025-10-01 04:18:03.276622', 'step': 1848, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:03.334163', 'step': 1848, 'epoch': 1} {'type': 'loss', 'content': 0.20398658514022827, 'timestamp': '2025-10-01 04:18:03.336451', 'step': 1849, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:03.389302', 'step': 1849, 'epoch': 1} {'type': 'loss', 'content': 0.16268610954284668, 'timestamp': '2025-10-01 04:18:03.391839', 'step': 1850, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:03.452211', 'step': 1850, 'epoch': 1} {'type': 'loss', 'content': 0.17720000445842743, 'timestamp': '2025-10-01 04:18:03.454453', 'step': 1851, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:03.507191', 'step': 1851, 'epoch': 1} {'type': 'loss', 'content': 0.1919368952512741, 'timestamp': '2025-10-01 04:18:03.512989', 'step': 1852, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:03.564454', 'step': 1852, 'epoch': 1} {'type': 'loss', 'content': 0.2476106435060501, 'timestamp': '2025-10-01 04:18:03.566955', 'step': 1853, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:03.620115', 'step': 1853, 'epoch': 1} {'type': 'loss', 'content': 0.13682693243026733, 'timestamp': '2025-10-01 04:18:03.621811', 'step': 1854, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:03.673923', 'step': 1854, 'epoch': 1} {'type': 'loss', 'content': 0.16284123063087463, 'timestamp': '2025-10-01 04:18:03.676015', 'step': 1855, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:03.729058', 'step': 1855, 'epoch': 1} {'type': 'loss', 'content': 0.1417369246482849, 'timestamp': '2025-10-01 04:18:03.734438', 'step': 1856, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:03.787698', 'step': 1856, 'epoch': 1} {'type': 'loss', 'content': 0.14881639182567596, 'timestamp': '2025-10-01 04:18:03.790331', 'step': 1857, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:03.843287', 'step': 1857, 'epoch': 1} {'type': 'loss', 'content': 0.08766172081232071, 'timestamp': '2025-10-01 04:18:03.845388', 'step': 1858, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:03.898977', 'step': 1858, 'epoch': 1} {'type': 'loss', 'content': 0.10178827494382858, 'timestamp': '2025-10-01 04:18:03.901121', 'step': 1859, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:03.954097', 'step': 1859, 'epoch': 1} {'type': 'loss', 'content': 0.13890880346298218, 'timestamp': '2025-10-01 04:18:03.959676', 'step': 1860, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:04.013880', 'step': 1860, 'epoch': 1} {'type': 'loss', 'content': 0.14899711310863495, 'timestamp': '2025-10-01 04:18:04.015904', 'step': 1861, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:04.068764', 'step': 1861, 'epoch': 1} {'type': 'loss', 'content': 0.18281851708889008, 'timestamp': '2025-10-01 04:18:04.070786', 'step': 1862, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:18:04.123312', 'step': 1862, 'epoch': 1} {'type': 'loss', 'content': 0.19359566271305084, 'timestamp': '2025-10-01 04:18:04.125730', 'step': 1863, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:04.179057', 'step': 1863, 'epoch': 1} {'type': 'loss', 'content': 0.319077730178833, 'timestamp': '2025-10-01 04:18:04.184953', 'step': 1864, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-01 04:18:18.577910', 'step': 1864, 'epoch': 1} {'type': 'pplx', 'content': 10744.742744130519, 'timestamp': '2025-10-01 04:18:18.583925', 'step': 1864, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:18.639362', 'step': 1864, 'epoch': 1} {'type': 'loss', 'content': 0.1835155040025711, 'timestamp': '2025-10-01 04:18:18.641680', 'step': 1865, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:18.695744', 'step': 1865, 'epoch': 1} {'type': 'loss', 'content': 0.26140812039375305, 'timestamp': '2025-10-01 04:18:18.697966', 'step': 1866, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:18.752524', 'step': 1866, 'epoch': 1} {'type': 'loss', 'content': 0.09162113070487976, 'timestamp': '2025-10-01 04:18:18.755069', 'step': 1867, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:18.808883', 'step': 1867, 'epoch': 1} {'type': 'loss', 'content': 0.2648991048336029, 'timestamp': '2025-10-01 04:18:18.815046', 'step': 1868, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:18.868164', 'step': 1868, 'epoch': 1} {'type': 'loss', 'content': 0.12558139860630035, 'timestamp': '2025-10-01 04:18:18.870398', 'step': 1869, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:18.927361', 'step': 1869, 'epoch': 1} {'type': 'loss', 'content': 0.18091166019439697, 'timestamp': '2025-10-01 04:18:18.929590', 'step': 1870, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:18.993663', 'step': 1870, 'epoch': 1} {'type': 'loss', 'content': 0.21294383704662323, 'timestamp': '2025-10-01 04:18:18.995703', 'step': 1871, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:19.060113', 'step': 1871, 'epoch': 1} {'type': 'loss', 'content': 0.17090724408626556, 'timestamp': '2025-10-01 04:18:19.066175', 'step': 1872, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:19.119540', 'step': 1872, 'epoch': 1} {'type': 'loss', 'content': 0.2453894466161728, 'timestamp': '2025-10-01 04:18:19.121878', 'step': 1873, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:19.176055', 'step': 1873, 'epoch': 1} {'type': 'loss', 'content': 0.13620546460151672, 'timestamp': '2025-10-01 04:18:19.178822', 'step': 1874, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:19.232844', 'step': 1874, 'epoch': 1} {'type': 'loss', 'content': 0.19597361981868744, 'timestamp': '2025-10-01 04:18:19.234894', 'step': 1875, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:19.288878', 'step': 1875, 'epoch': 1} {'type': 'loss', 'content': 0.13926634192466736, 'timestamp': '2025-10-01 04:18:19.295080', 'step': 1876, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:18:19.348696', 'step': 1876, 'epoch': 1} {'type': 'loss', 'content': 0.19801877439022064, 'timestamp': '2025-10-01 04:18:19.350668', 'step': 1877, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:19.407157', 'step': 1877, 'epoch': 1} {'type': 'loss', 'content': 0.13517345488071442, 'timestamp': '2025-10-01 04:18:19.409301', 'step': 1878, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:19.466088', 'step': 1878, 'epoch': 1} {'type': 'loss', 'content': 0.12284781783819199, 'timestamp': '2025-10-01 04:18:19.469206', 'step': 1879, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:19.530864', 'step': 1879, 'epoch': 1} {'type': 'loss', 'content': 0.2118532955646515, 'timestamp': '2025-10-01 04:18:19.536729', 'step': 1880, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:19.590388', 'step': 1880, 'epoch': 1} {'type': 'loss', 'content': 0.22484190762043, 'timestamp': '2025-10-01 04:18:19.592777', 'step': 1881, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:19.647092', 'step': 1881, 'epoch': 1} {'type': 'loss', 'content': 0.10324478149414062, 'timestamp': '2025-10-01 04:18:19.651204', 'step': 1882, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:19.706536', 'step': 1882, 'epoch': 1} {'type': 'loss', 'content': 0.18837802112102509, 'timestamp': '2025-10-01 04:18:19.708899', 'step': 1883, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:19.762861', 'step': 1883, 'epoch': 1} {'type': 'loss', 'content': 0.1652647703886032, 'timestamp': '2025-10-01 04:18:19.768747', 'step': 1884, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:19.821752', 'step': 1884, 'epoch': 1} {'type': 'loss', 'content': 0.1467421054840088, 'timestamp': '2025-10-01 04:18:19.824107', 'step': 1885, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:19.877913', 'step': 1885, 'epoch': 1} {'type': 'loss', 'content': 0.16594131290912628, 'timestamp': '2025-10-01 04:18:19.880132', 'step': 1886, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:18:19.939205', 'step': 1886, 'epoch': 1} {'type': 'loss', 'content': 0.14393627643585205, 'timestamp': '2025-10-01 04:18:19.941102', 'step': 1887, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:18:19.994014', 'step': 1887, 'epoch': 1} {'type': 'loss', 'content': 0.22262689471244812, 'timestamp': '2025-10-01 04:18:19.999400', 'step': 1888, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:20.051973', 'step': 1888, 'epoch': 1} {'type': 'loss', 'content': 0.13538922369480133, 'timestamp': '2025-10-01 04:18:20.057270', 'step': 1889, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:20.110687', 'step': 1889, 'epoch': 1} {'type': 'loss', 'content': 0.24269859492778778, 'timestamp': '2025-10-01 04:18:20.112594', 'step': 1890, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:18:20.165556', 'step': 1890, 'epoch': 1} {'type': 'loss', 'content': 0.18038968741893768, 'timestamp': '2025-10-01 04:18:20.169437', 'step': 1891, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:20.223831', 'step': 1891, 'epoch': 1} {'type': 'loss', 'content': 0.21667349338531494, 'timestamp': '2025-10-01 04:18:20.229318', 'step': 1892, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:20.281782', 'step': 1892, 'epoch': 1} {'type': 'loss', 'content': 0.13315671682357788, 'timestamp': '2025-10-01 04:18:20.283944', 'step': 1893, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:20.336817', 'step': 1893, 'epoch': 1} {'type': 'loss', 'content': 0.2020774483680725, 'timestamp': '2025-10-01 04:18:20.338608', 'step': 1894, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:18:20.391085', 'step': 1894, 'epoch': 1} {'type': 'loss', 'content': 0.167000412940979, 'timestamp': '2025-10-01 04:18:20.393363', 'step': 1895, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:20.452175', 'step': 1895, 'epoch': 1} {'type': 'loss', 'content': 0.2224980741739273, 'timestamp': '2025-10-01 04:18:20.457942', 'step': 1896, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:20.510704', 'step': 1896, 'epoch': 1} {'type': 'loss', 'content': 0.15411236882209778, 'timestamp': '2025-10-01 04:18:20.513013', 'step': 1897, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:20.565161', 'step': 1897, 'epoch': 1} {'type': 'loss', 'content': 0.12605585157871246, 'timestamp': '2025-10-01 04:18:20.567355', 'step': 1898, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:20.620441', 'step': 1898, 'epoch': 1} {'type': 'loss', 'content': 0.12690970301628113, 'timestamp': '2025-10-01 04:18:20.622707', 'step': 1899, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:20.675425', 'step': 1899, 'epoch': 1} {'type': 'loss', 'content': 0.19363462924957275, 'timestamp': '2025-10-01 04:18:20.681118', 'step': 1900, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:20.733444', 'step': 1900, 'epoch': 1} {'type': 'loss', 'content': 0.2140757143497467, 'timestamp': '2025-10-01 04:18:20.735900', 'step': 1901, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:20.793742', 'step': 1901, 'epoch': 1} {'type': 'loss', 'content': 0.30822622776031494, 'timestamp': '2025-10-01 04:18:20.797186', 'step': 1902, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:20.850023', 'step': 1902, 'epoch': 1} {'type': 'loss', 'content': 0.2500338852405548, 'timestamp': '2025-10-01 04:18:20.852637', 'step': 1903, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:20.907594', 'step': 1903, 'epoch': 1} {'type': 'loss', 'content': 0.2796938121318817, 'timestamp': '2025-10-01 04:18:20.913696', 'step': 1904, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:20.969047', 'step': 1904, 'epoch': 1} {'type': 'loss', 'content': 0.1845168173313141, 'timestamp': '2025-10-01 04:18:20.971150', 'step': 1905, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:21.024248', 'step': 1905, 'epoch': 1} {'type': 'loss', 'content': 0.21813715994358063, 'timestamp': '2025-10-01 04:18:21.026190', 'step': 1906, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:21.079519', 'step': 1906, 'epoch': 1} {'type': 'loss', 'content': 0.18037627637386322, 'timestamp': '2025-10-01 04:18:21.081591', 'step': 1907, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:21.134847', 'step': 1907, 'epoch': 1} {'type': 'loss', 'content': 0.23037154972553253, 'timestamp': '2025-10-01 04:18:21.140517', 'step': 1908, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:21.194788', 'step': 1908, 'epoch': 1} {'type': 'loss', 'content': 0.2052881270647049, 'timestamp': '2025-10-01 04:18:21.197477', 'step': 1909, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:18:21.251508', 'step': 1909, 'epoch': 1} {'type': 'loss', 'content': 0.2072608768939972, 'timestamp': '2025-10-01 04:18:21.254103', 'step': 1910, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:21.313542', 'step': 1910, 'epoch': 1} {'type': 'loss', 'content': 0.1709171086549759, 'timestamp': '2025-10-01 04:18:21.315640', 'step': 1911, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:21.368993', 'step': 1911, 'epoch': 1} {'type': 'loss', 'content': 0.2493799328804016, 'timestamp': '2025-10-01 04:18:21.374513', 'step': 1912, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:21.427187', 'step': 1912, 'epoch': 1} {'type': 'loss', 'content': 0.15318450331687927, 'timestamp': '2025-10-01 04:18:21.435479', 'step': 1913, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:21.489802', 'step': 1913, 'epoch': 1} {'type': 'loss', 'content': 0.21264846622943878, 'timestamp': '2025-10-01 04:18:21.491742', 'step': 1914, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:18:21.544636', 'step': 1914, 'epoch': 1} {'type': 'loss', 'content': 0.26029109954833984, 'timestamp': '2025-10-01 04:18:21.548868', 'step': 1915, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:18:21.602296', 'step': 1915, 'epoch': 1} {'type': 'loss', 'content': 0.15688596665859222, 'timestamp': '2025-10-01 04:18:21.608213', 'step': 1916, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:21.663764', 'step': 1916, 'epoch': 1} {'type': 'loss', 'content': 0.1523807793855667, 'timestamp': '2025-10-01 04:18:21.666040', 'step': 1917, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:18:21.719407', 'step': 1917, 'epoch': 1} {'type': 'loss', 'content': 0.18954679369926453, 'timestamp': '2025-10-01 04:18:21.721907', 'step': 1918, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:21.777455', 'step': 1918, 'epoch': 1} {'type': 'loss', 'content': 0.1500437706708908, 'timestamp': '2025-10-01 04:18:21.779476', 'step': 1919, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:21.833566', 'step': 1919, 'epoch': 1} {'type': 'loss', 'content': 0.19883328676223755, 'timestamp': '2025-10-01 04:18:21.839664', 'step': 1920, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:21.893749', 'step': 1920, 'epoch': 1} {'type': 'loss', 'content': 0.24913370609283447, 'timestamp': '2025-10-01 04:18:21.896015', 'step': 1921, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:21.949821', 'step': 1921, 'epoch': 1} {'type': 'loss', 'content': 0.14371106028556824, 'timestamp': '2025-10-01 04:18:21.952117', 'step': 1922, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:22.008641', 'step': 1922, 'epoch': 1} {'type': 'loss', 'content': 0.11291147023439407, 'timestamp': '2025-10-01 04:18:22.010803', 'step': 1923, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:22.063973', 'step': 1923, 'epoch': 1} {'type': 'loss', 'content': 0.22762782871723175, 'timestamp': '2025-10-01 04:18:22.069972', 'step': 1924, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:22.125019', 'step': 1924, 'epoch': 1} {'type': 'loss', 'content': 0.22087933123111725, 'timestamp': '2025-10-01 04:18:22.127066', 'step': 1925, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:18:22.181061', 'step': 1925, 'epoch': 1} {'type': 'loss', 'content': 0.23776036500930786, 'timestamp': '2025-10-01 04:18:22.183767', 'step': 1926, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:18:22.237385', 'step': 1926, 'epoch': 1} {'type': 'loss', 'content': 0.12822815775871277, 'timestamp': '2025-10-01 04:18:22.239495', 'step': 1927, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:22.292553', 'step': 1927, 'epoch': 1} {'type': 'loss', 'content': 0.24517659842967987, 'timestamp': '2025-10-01 04:18:22.298170', 'step': 1928, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:22.351470', 'step': 1928, 'epoch': 1} {'type': 'loss', 'content': 0.21969255805015564, 'timestamp': '2025-10-01 04:18:22.354125', 'step': 1929, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:22.407371', 'step': 1929, 'epoch': 1} {'type': 'loss', 'content': 0.18890434503555298, 'timestamp': '2025-10-01 04:18:22.409875', 'step': 1930, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:18:22.464181', 'step': 1930, 'epoch': 1} {'type': 'loss', 'content': 0.2023044377565384, 'timestamp': '2025-10-01 04:18:22.466524', 'step': 1931, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:22.521327', 'step': 1931, 'epoch': 1} {'type': 'loss', 'content': 0.12821240723133087, 'timestamp': '2025-10-01 04:18:22.534138', 'step': 1932, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:22.590366', 'step': 1932, 'epoch': 1} {'type': 'loss', 'content': 0.22013694047927856, 'timestamp': '2025-10-01 04:18:22.592421', 'step': 1933, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:22.653026', 'step': 1933, 'epoch': 1} {'type': 'loss', 'content': 0.18110504746437073, 'timestamp': '2025-10-01 04:18:22.655035', 'step': 1934, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:22.726875', 'step': 1934, 'epoch': 1} {'type': 'loss', 'content': 0.12196452915668488, 'timestamp': '2025-10-01 04:18:22.729078', 'step': 1935, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:22.782233', 'step': 1935, 'epoch': 1} {'type': 'loss', 'content': 0.2529241740703583, 'timestamp': '2025-10-01 04:18:22.788122', 'step': 1936, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:18:22.842525', 'step': 1936, 'epoch': 1} {'type': 'loss', 'content': 0.15585650503635406, 'timestamp': '2025-10-01 04:18:22.851459', 'step': 1937, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:22.908104', 'step': 1937, 'epoch': 1} {'type': 'loss', 'content': 0.2636117935180664, 'timestamp': '2025-10-01 04:18:22.910215', 'step': 1938, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:22.964387', 'step': 1938, 'epoch': 1} {'type': 'loss', 'content': 0.1882317215204239, 'timestamp': '2025-10-01 04:18:22.972748', 'step': 1939, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:23.026461', 'step': 1939, 'epoch': 1} {'type': 'loss', 'content': 0.16784769296646118, 'timestamp': '2025-10-01 04:18:23.033661', 'step': 1940, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:23.091189', 'step': 1940, 'epoch': 1} {'type': 'loss', 'content': 0.27181094884872437, 'timestamp': '2025-10-01 04:18:23.096216', 'step': 1941, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:23.156301', 'step': 1941, 'epoch': 1} {'type': 'loss', 'content': 0.19971781969070435, 'timestamp': '2025-10-01 04:18:23.159517', 'step': 1942, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:23.219742', 'step': 1942, 'epoch': 1} {'type': 'loss', 'content': 0.16080693900585175, 'timestamp': '2025-10-01 04:18:23.221663', 'step': 1943, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:23.275121', 'step': 1943, 'epoch': 1} {'type': 'loss', 'content': 0.22979024052619934, 'timestamp': '2025-10-01 04:18:23.281196', 'step': 1944, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:23.334955', 'step': 1944, 'epoch': 1} {'type': 'loss', 'content': 0.27892133593559265, 'timestamp': '2025-10-01 04:18:23.337220', 'step': 1945, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:23.393065', 'step': 1945, 'epoch': 1} {'type': 'loss', 'content': 0.14950880408287048, 'timestamp': '2025-10-01 04:18:23.395943', 'step': 1946, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:23.451646', 'step': 1946, 'epoch': 1} {'type': 'loss', 'content': 0.18862855434417725, 'timestamp': '2025-10-01 04:18:23.454837', 'step': 1947, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:23.509048', 'step': 1947, 'epoch': 1} {'type': 'loss', 'content': 0.17788836359977722, 'timestamp': '2025-10-01 04:18:23.514716', 'step': 1948, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:23.568219', 'step': 1948, 'epoch': 1} {'type': 'loss', 'content': 0.30841147899627686, 'timestamp': '2025-10-01 04:18:23.570255', 'step': 1949, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:23.626333', 'step': 1949, 'epoch': 1} {'type': 'loss', 'content': 0.18031926453113556, 'timestamp': '2025-10-01 04:18:23.629390', 'step': 1950, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:23.683974', 'step': 1950, 'epoch': 1} {'type': 'loss', 'content': 0.2884727716445923, 'timestamp': '2025-10-01 04:18:23.686096', 'step': 1951, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:23.739141', 'step': 1951, 'epoch': 1} {'type': 'loss', 'content': 0.1283935159444809, 'timestamp': '2025-10-01 04:18:23.744627', 'step': 1952, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:18:23.797614', 'step': 1952, 'epoch': 1} {'type': 'loss', 'content': 0.16025999188423157, 'timestamp': '2025-10-01 04:18:23.799779', 'step': 1953, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:23.857003', 'step': 1953, 'epoch': 1} {'type': 'loss', 'content': 0.10820596665143967, 'timestamp': '2025-10-01 04:18:23.859196', 'step': 1954, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:23.917081', 'step': 1954, 'epoch': 1} {'type': 'loss', 'content': 0.14528891444206238, 'timestamp': '2025-10-01 04:18:23.919819', 'step': 1955, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:23.979396', 'step': 1955, 'epoch': 1} {'type': 'loss', 'content': 0.15749090909957886, 'timestamp': '2025-10-01 04:18:23.985320', 'step': 1956, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:24.038254', 'step': 1956, 'epoch': 1} {'type': 'loss', 'content': 0.20756587386131287, 'timestamp': '2025-10-01 04:18:24.040183', 'step': 1957, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:24.097648', 'step': 1957, 'epoch': 1} {'type': 'loss', 'content': 0.20215125381946564, 'timestamp': '2025-10-01 04:18:24.099869', 'step': 1958, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:24.154399', 'step': 1958, 'epoch': 1} {'type': 'loss', 'content': 0.1863068789243698, 'timestamp': '2025-10-01 04:18:24.158843', 'step': 1959, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:24.212259', 'step': 1959, 'epoch': 1} {'type': 'loss', 'content': 0.1925256848335266, 'timestamp': '2025-10-01 04:18:24.218222', 'step': 1960, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:24.276040', 'step': 1960, 'epoch': 1} {'type': 'loss', 'content': 0.14095567166805267, 'timestamp': '2025-10-01 04:18:24.278771', 'step': 1961, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:18:24.332172', 'step': 1961, 'epoch': 1} {'type': 'loss', 'content': 0.15961302816867828, 'timestamp': '2025-10-01 04:18:24.334832', 'step': 1962, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:24.394811', 'step': 1962, 'epoch': 1} {'type': 'loss', 'content': 0.1423218995332718, 'timestamp': '2025-10-01 04:18:24.397110', 'step': 1963, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:24.454798', 'step': 1963, 'epoch': 1} {'type': 'loss', 'content': 0.13001562654972076, 'timestamp': '2025-10-01 04:18:24.460641', 'step': 1964, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:24.513777', 'step': 1964, 'epoch': 1} {'type': 'loss', 'content': 0.1538296937942505, 'timestamp': '2025-10-01 04:18:24.515970', 'step': 1965, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:24.569950', 'step': 1965, 'epoch': 1} {'type': 'loss', 'content': 0.12210993468761444, 'timestamp': '2025-10-01 04:18:24.571919', 'step': 1966, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:24.625434', 'step': 1966, 'epoch': 1} {'type': 'loss', 'content': 0.3679455518722534, 'timestamp': '2025-10-01 04:18:24.627562', 'step': 1967, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:24.680772', 'step': 1967, 'epoch': 1} {'type': 'loss', 'content': 0.1488124281167984, 'timestamp': '2025-10-01 04:18:24.690374', 'step': 1968, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:18:24.748549', 'step': 1968, 'epoch': 1} {'type': 'loss', 'content': 0.1766573041677475, 'timestamp': '2025-10-01 04:18:24.750931', 'step': 1969, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:24.804665', 'step': 1969, 'epoch': 1} {'type': 'loss', 'content': 0.1480499505996704, 'timestamp': '2025-10-01 04:18:24.806964', 'step': 1970, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:24.861050', 'step': 1970, 'epoch': 1} {'type': 'loss', 'content': 0.24720823764801025, 'timestamp': '2025-10-01 04:18:24.863202', 'step': 1971, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:24.918027', 'step': 1971, 'epoch': 1} {'type': 'loss', 'content': 0.11091146618127823, 'timestamp': '2025-10-01 04:18:24.924412', 'step': 1972, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:24.979026', 'step': 1972, 'epoch': 1} {'type': 'loss', 'content': 0.18006117641925812, 'timestamp': '2025-10-01 04:18:24.981096', 'step': 1973, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:25.039601', 'step': 1973, 'epoch': 1} {'type': 'loss', 'content': 0.21206074953079224, 'timestamp': '2025-10-01 04:18:25.043259', 'step': 1974, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:25.097027', 'step': 1974, 'epoch': 1} {'type': 'loss', 'content': 0.1528272032737732, 'timestamp': '2025-10-01 04:18:25.099279', 'step': 1975, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:18:25.151967', 'step': 1975, 'epoch': 1} {'type': 'loss', 'content': 0.14795264601707458, 'timestamp': '2025-10-01 04:18:25.158060', 'step': 1976, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:25.211360', 'step': 1976, 'epoch': 1} {'type': 'loss', 'content': 0.17705321311950684, 'timestamp': '2025-10-01 04:18:25.213678', 'step': 1977, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:25.274614', 'step': 1977, 'epoch': 1} {'type': 'loss', 'content': 0.17646564543247223, 'timestamp': '2025-10-01 04:18:25.280878', 'step': 1978, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:25.335563', 'step': 1978, 'epoch': 1} {'type': 'loss', 'content': 0.2541149854660034, 'timestamp': '2025-10-01 04:18:25.337825', 'step': 1979, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:25.393060', 'step': 1979, 'epoch': 1} {'type': 'loss', 'content': 0.15125243365764618, 'timestamp': '2025-10-01 04:18:25.399603', 'step': 1980, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:25.452419', 'step': 1980, 'epoch': 1} {'type': 'loss', 'content': 0.1886420100927353, 'timestamp': '2025-10-01 04:18:25.454422', 'step': 1981, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:18:25.512623', 'step': 1981, 'epoch': 1} {'type': 'loss', 'content': 0.26212742924690247, 'timestamp': '2025-10-01 04:18:25.514874', 'step': 1982, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:25.567529', 'step': 1982, 'epoch': 1} {'type': 'loss', 'content': 0.23106540739536285, 'timestamp': '2025-10-01 04:18:25.574899', 'step': 1983, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:25.635451', 'step': 1983, 'epoch': 1} {'type': 'loss', 'content': 0.24943755567073822, 'timestamp': '2025-10-01 04:18:25.641317', 'step': 1984, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:25.694332', 'step': 1984, 'epoch': 1} {'type': 'loss', 'content': 0.15676353871822357, 'timestamp': '2025-10-01 04:18:25.696502', 'step': 1985, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:25.751020', 'step': 1985, 'epoch': 1} {'type': 'loss', 'content': 0.14043046534061432, 'timestamp': '2025-10-01 04:18:25.753038', 'step': 1986, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:25.810002', 'step': 1986, 'epoch': 1} {'type': 'loss', 'content': 0.23489488661289215, 'timestamp': '2025-10-01 04:18:25.812574', 'step': 1987, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:25.868040', 'step': 1987, 'epoch': 1} {'type': 'loss', 'content': 0.23504997789859772, 'timestamp': '2025-10-01 04:18:25.873896', 'step': 1988, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:25.926946', 'step': 1988, 'epoch': 1} {'type': 'loss', 'content': 0.2860682010650635, 'timestamp': '2025-10-01 04:18:25.929605', 'step': 1989, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:26.015155', 'step': 1989, 'epoch': 1} {'type': 'loss', 'content': 0.1503145843744278, 'timestamp': '2025-10-01 04:18:26.017972', 'step': 1990, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:18:26.081186', 'step': 1990, 'epoch': 1} {'type': 'loss', 'content': 0.16617392003536224, 'timestamp': '2025-10-01 04:18:26.083025', 'step': 1991, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:26.144241', 'step': 1991, 'epoch': 1} {'type': 'loss', 'content': 0.17199254035949707, 'timestamp': '2025-10-01 04:18:26.150504', 'step': 1992, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:26.205573', 'step': 1992, 'epoch': 1} {'type': 'loss', 'content': 0.16827906668186188, 'timestamp': '2025-10-01 04:18:26.208056', 'step': 1993, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:26.261279', 'step': 1993, 'epoch': 1} {'type': 'loss', 'content': 0.19565477967262268, 'timestamp': '2025-10-01 04:18:26.263262', 'step': 1994, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:26.316988', 'step': 1994, 'epoch': 1} {'type': 'loss', 'content': 0.2287626564502716, 'timestamp': '2025-10-01 04:18:26.319151', 'step': 1995, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:26.372710', 'step': 1995, 'epoch': 1} {'type': 'loss', 'content': 0.18217715620994568, 'timestamp': '2025-10-01 04:18:26.378617', 'step': 1996, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:18:26.431490', 'step': 1996, 'epoch': 1} {'type': 'loss', 'content': 0.20922419428825378, 'timestamp': '2025-10-01 04:18:26.434321', 'step': 1997, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:26.487756', 'step': 1997, 'epoch': 1} {'type': 'loss', 'content': 0.2568192183971405, 'timestamp': '2025-10-01 04:18:26.489838', 'step': 1998, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:26.559476', 'step': 1998, 'epoch': 1} {'type': 'loss', 'content': 0.16223789751529694, 'timestamp': '2025-10-01 04:18:26.561686', 'step': 1999, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:26.625806', 'step': 1999, 'epoch': 1} {'type': 'loss', 'content': 0.17177142202854156, 'timestamp': '2025-10-01 04:18:26.631715', 'step': 2000, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 2000', 'timestamp': '2025-10-01 04:18:27.022172', 'step': 2000, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:27.079487', 'step': 2000, 'epoch': 1} {'type': 'loss', 'content': 0.1373666524887085, 'timestamp': '2025-10-01 04:18:27.085943', 'step': 2001, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:27.159335', 'step': 2001, 'epoch': 1} {'type': 'loss', 'content': 0.22376114130020142, 'timestamp': '2025-10-01 04:18:27.161582', 'step': 2002, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:27.216282', 'step': 2002, 'epoch': 1} {'type': 'loss', 'content': 0.1858169287443161, 'timestamp': '2025-10-01 04:18:27.218813', 'step': 2003, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:18:27.272875', 'step': 2003, 'epoch': 1} {'type': 'loss', 'content': 0.22117525339126587, 'timestamp': '2025-10-01 04:18:27.278249', 'step': 2004, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:27.330998', 'step': 2004, 'epoch': 1} {'type': 'loss', 'content': 0.13118818402290344, 'timestamp': '2025-10-01 04:18:27.335725', 'step': 2005, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:27.392378', 'step': 2005, 'epoch': 1} {'type': 'loss', 'content': 0.16267195343971252, 'timestamp': '2025-10-01 04:18:27.394263', 'step': 2006, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:18:27.449244', 'step': 2006, 'epoch': 1} {'type': 'loss', 'content': 0.22649149596691132, 'timestamp': '2025-10-01 04:18:27.451321', 'step': 2007, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:18:27.506160', 'step': 2007, 'epoch': 1} {'type': 'loss', 'content': 0.1836780309677124, 'timestamp': '2025-10-01 04:18:27.513167', 'step': 2008, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:27.568775', 'step': 2008, 'epoch': 1} {'type': 'loss', 'content': 0.14153070747852325, 'timestamp': '2025-10-01 04:18:27.584542', 'step': 2009, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:27.662648', 'step': 2009, 'epoch': 1} {'type': 'loss', 'content': 0.19693931937217712, 'timestamp': '2025-10-01 04:18:27.664455', 'step': 2010, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:27.721561', 'step': 2010, 'epoch': 1} {'type': 'loss', 'content': 0.10869264602661133, 'timestamp': '2025-10-01 04:18:27.723662', 'step': 2011, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:27.779390', 'step': 2011, 'epoch': 1} {'type': 'loss', 'content': 0.16118158400058746, 'timestamp': '2025-10-01 04:18:27.785958', 'step': 2012, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:18:27.853139', 'step': 2012, 'epoch': 1} {'type': 'loss', 'content': 0.2752498686313629, 'timestamp': '2025-10-01 04:18:27.863607', 'step': 2013, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:27.919145', 'step': 2013, 'epoch': 1} {'type': 'loss', 'content': 0.22596509754657745, 'timestamp': '2025-10-01 04:18:27.921141', 'step': 2014, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:27.979469', 'step': 2014, 'epoch': 1} {'type': 'loss', 'content': 0.13654251396656036, 'timestamp': '2025-10-01 04:18:27.981599', 'step': 2015, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:28.038860', 'step': 2015, 'epoch': 1} {'type': 'loss', 'content': 0.12731465697288513, 'timestamp': '2025-10-01 04:18:28.045858', 'step': 2016, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:28.101992', 'step': 2016, 'epoch': 1} {'type': 'loss', 'content': 0.1158311739563942, 'timestamp': '2025-10-01 04:18:28.103848', 'step': 2017, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:18:28.163259', 'step': 2017, 'epoch': 1} {'type': 'loss', 'content': 0.20439372956752777, 'timestamp': '2025-10-01 04:18:28.165161', 'step': 2018, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:28.227782', 'step': 2018, 'epoch': 1} {'type': 'loss', 'content': 0.17258471250534058, 'timestamp': '2025-10-01 04:18:28.229550', 'step': 2019, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:28.289997', 'step': 2019, 'epoch': 1} {'type': 'loss', 'content': 0.2748343348503113, 'timestamp': '2025-10-01 04:18:28.297025', 'step': 2020, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:28.353359', 'step': 2020, 'epoch': 1} {'type': 'loss', 'content': 0.16856332123279572, 'timestamp': '2025-10-01 04:18:28.355572', 'step': 2021, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:28.411265', 'step': 2021, 'epoch': 1} {'type': 'loss', 'content': 0.16499440371990204, 'timestamp': '2025-10-01 04:18:28.413454', 'step': 2022, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:28.468870', 'step': 2022, 'epoch': 1} {'type': 'loss', 'content': 0.23506000638008118, 'timestamp': '2025-10-01 04:18:28.470851', 'step': 2023, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:28.525200', 'step': 2023, 'epoch': 1} {'type': 'loss', 'content': 0.15906839072704315, 'timestamp': '2025-10-01 04:18:28.531006', 'step': 2024, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:18:28.584229', 'step': 2024, 'epoch': 1} {'type': 'loss', 'content': 0.22939319908618927, 'timestamp': '2025-10-01 04:18:28.586498', 'step': 2025, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:18:28.641226', 'step': 2025, 'epoch': 1} {'type': 'loss', 'content': 0.2196374386548996, 'timestamp': '2025-10-01 04:18:28.643494', 'step': 2026, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:28.697623', 'step': 2026, 'epoch': 1} {'type': 'loss', 'content': 0.17880268394947052, 'timestamp': '2025-10-01 04:18:28.700329', 'step': 2027, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:28.754956', 'step': 2027, 'epoch': 1} {'type': 'loss', 'content': 0.1385248452425003, 'timestamp': '2025-10-01 04:18:28.760835', 'step': 2028, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:28.815004', 'step': 2028, 'epoch': 1} {'type': 'loss', 'content': 0.16389955580234528, 'timestamp': '2025-10-01 04:18:28.817390', 'step': 2029, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:28.871760', 'step': 2029, 'epoch': 1} {'type': 'loss', 'content': 0.16918861865997314, 'timestamp': '2025-10-01 04:18:28.874900', 'step': 2030, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:28.930828', 'step': 2030, 'epoch': 1} {'type': 'loss', 'content': 0.11840073764324188, 'timestamp': '2025-10-01 04:18:28.933455', 'step': 2031, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:28.989032', 'step': 2031, 'epoch': 1} {'type': 'loss', 'content': 0.142182856798172, 'timestamp': '2025-10-01 04:18:28.995034', 'step': 2032, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:29.049497', 'step': 2032, 'epoch': 1} {'type': 'loss', 'content': 0.2780693471431732, 'timestamp': '2025-10-01 04:18:29.051973', 'step': 2033, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:29.108003', 'step': 2033, 'epoch': 1} {'type': 'loss', 'content': 0.1466999053955078, 'timestamp': '2025-10-01 04:18:29.110523', 'step': 2034, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:29.165176', 'step': 2034, 'epoch': 1} {'type': 'loss', 'content': 0.09443115442991257, 'timestamp': '2025-10-01 04:18:29.172263', 'step': 2035, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:29.227326', 'step': 2035, 'epoch': 1} {'type': 'loss', 'content': 0.23210719227790833, 'timestamp': '2025-10-01 04:18:29.234506', 'step': 2036, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:29.290229', 'step': 2036, 'epoch': 1} {'type': 'loss', 'content': 0.16346615552902222, 'timestamp': '2025-10-01 04:18:29.292624', 'step': 2037, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:29.347160', 'step': 2037, 'epoch': 1} {'type': 'loss', 'content': 0.27371716499328613, 'timestamp': '2025-10-01 04:18:29.349578', 'step': 2038, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:29.404352', 'step': 2038, 'epoch': 1} {'type': 'loss', 'content': 0.21937625110149384, 'timestamp': '2025-10-01 04:18:29.406541', 'step': 2039, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:29.462655', 'step': 2039, 'epoch': 1} {'type': 'loss', 'content': 0.1369217187166214, 'timestamp': '2025-10-01 04:18:29.469005', 'step': 2040, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:29.527848', 'step': 2040, 'epoch': 1} {'type': 'loss', 'content': 0.17134685814380646, 'timestamp': '2025-10-01 04:18:29.529979', 'step': 2041, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:29.589902', 'step': 2041, 'epoch': 1} {'type': 'loss', 'content': 0.1368628889322281, 'timestamp': '2025-10-01 04:18:29.592612', 'step': 2042, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:29.651802', 'step': 2042, 'epoch': 1} {'type': 'loss', 'content': 0.23372407257556915, 'timestamp': '2025-10-01 04:18:29.654300', 'step': 2043, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:18:29.714271', 'step': 2043, 'epoch': 1} {'type': 'loss', 'content': 0.11410317569971085, 'timestamp': '2025-10-01 04:18:29.721120', 'step': 2044, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:18:29.779872', 'step': 2044, 'epoch': 1} {'type': 'loss', 'content': 0.19557535648345947, 'timestamp': '2025-10-01 04:18:29.782076', 'step': 2045, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:29.840345', 'step': 2045, 'epoch': 1} {'type': 'loss', 'content': 0.0921483114361763, 'timestamp': '2025-10-01 04:18:29.842646', 'step': 2046, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:29.898808', 'step': 2046, 'epoch': 1} {'type': 'loss', 'content': 0.16965457797050476, 'timestamp': '2025-10-01 04:18:29.901122', 'step': 2047, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:29.958385', 'step': 2047, 'epoch': 1} {'type': 'loss', 'content': 0.22695323824882507, 'timestamp': '2025-10-01 04:18:29.965078', 'step': 2048, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:30.021633', 'step': 2048, 'epoch': 1} {'type': 'loss', 'content': 0.12252583354711533, 'timestamp': '2025-10-01 04:18:30.024010', 'step': 2049, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:30.081580', 'step': 2049, 'epoch': 1} {'type': 'loss', 'content': 0.17524118721485138, 'timestamp': '2025-10-01 04:18:30.084115', 'step': 2050, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:30.142282', 'step': 2050, 'epoch': 1} {'type': 'loss', 'content': 0.212743878364563, 'timestamp': '2025-10-01 04:18:30.153162', 'step': 2051, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:18:30.211921', 'step': 2051, 'epoch': 1} {'type': 'loss', 'content': 0.17792993783950806, 'timestamp': '2025-10-01 04:18:30.218768', 'step': 2052, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:30.274729', 'step': 2052, 'epoch': 1} {'type': 'loss', 'content': 0.2325729876756668, 'timestamp': '2025-10-01 04:18:30.276618', 'step': 2053, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:18:30.330774', 'step': 2053, 'epoch': 1} {'type': 'loss', 'content': 0.16504879295825958, 'timestamp': '2025-10-01 04:18:30.332770', 'step': 2054, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:30.385700', 'step': 2054, 'epoch': 1} {'type': 'loss', 'content': 0.11872455477714539, 'timestamp': '2025-10-01 04:18:30.387821', 'step': 2055, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:30.441481', 'step': 2055, 'epoch': 1} {'type': 'loss', 'content': 0.11927788704633713, 'timestamp': '2025-10-01 04:18:30.447449', 'step': 2056, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:30.499704', 'step': 2056, 'epoch': 1} {'type': 'loss', 'content': 0.1281716227531433, 'timestamp': '2025-10-01 04:18:30.501762', 'step': 2057, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:18:30.554165', 'step': 2057, 'epoch': 1} {'type': 'loss', 'content': 0.22070668637752533, 'timestamp': '2025-10-01 04:18:30.556380', 'step': 2058, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:30.610002', 'step': 2058, 'epoch': 1} {'type': 'loss', 'content': 0.12400148063898087, 'timestamp': '2025-10-01 04:18:30.612267', 'step': 2059, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:30.665361', 'step': 2059, 'epoch': 1} {'type': 'loss', 'content': 0.13352002203464508, 'timestamp': '2025-10-01 04:18:30.670822', 'step': 2060, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:30.723106', 'step': 2060, 'epoch': 1} {'type': 'loss', 'content': 0.1393681913614273, 'timestamp': '2025-10-01 04:18:30.725124', 'step': 2061, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:30.778486', 'step': 2061, 'epoch': 1} {'type': 'loss', 'content': 0.1363399773836136, 'timestamp': '2025-10-01 04:18:30.780170', 'step': 2062, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:30.832926', 'step': 2062, 'epoch': 1} {'type': 'loss', 'content': 0.19205717742443085, 'timestamp': '2025-10-01 04:18:30.835249', 'step': 2063, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:30.888451', 'step': 2063, 'epoch': 1} {'type': 'loss', 'content': 0.2382403463125229, 'timestamp': '2025-10-01 04:18:30.894802', 'step': 2064, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:18:30.947762', 'step': 2064, 'epoch': 1} {'type': 'loss', 'content': 0.18126963078975677, 'timestamp': '2025-10-01 04:18:30.949837', 'step': 2065, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:18:31.002559', 'step': 2065, 'epoch': 1} {'type': 'loss', 'content': 0.1503070443868637, 'timestamp': '2025-10-01 04:18:31.004768', 'step': 2066, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:31.058773', 'step': 2066, 'epoch': 1} {'type': 'loss', 'content': 0.17299765348434448, 'timestamp': '2025-10-01 04:18:31.060921', 'step': 2067, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:31.114268', 'step': 2067, 'epoch': 1} {'type': 'loss', 'content': 0.151666060090065, 'timestamp': '2025-10-01 04:18:31.120888', 'step': 2068, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:31.174011', 'step': 2068, 'epoch': 1} {'type': 'loss', 'content': 0.23215557634830475, 'timestamp': '2025-10-01 04:18:31.176300', 'step': 2069, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:31.229773', 'step': 2069, 'epoch': 1} {'type': 'loss', 'content': 0.16491422057151794, 'timestamp': '2025-10-01 04:18:31.232392', 'step': 2070, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:31.286850', 'step': 2070, 'epoch': 1} {'type': 'loss', 'content': 0.2266463190317154, 'timestamp': '2025-10-01 04:18:31.290387', 'step': 2071, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:31.348169', 'step': 2071, 'epoch': 1} {'type': 'loss', 'content': 0.20664162933826447, 'timestamp': '2025-10-01 04:18:31.354778', 'step': 2072, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:31.408375', 'step': 2072, 'epoch': 1} {'type': 'loss', 'content': 0.14248952269554138, 'timestamp': '2025-10-01 04:18:31.410552', 'step': 2073, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:18:31.464281', 'step': 2073, 'epoch': 1} {'type': 'loss', 'content': 0.1617041379213333, 'timestamp': '2025-10-01 04:18:31.467654', 'step': 2074, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:31.525390', 'step': 2074, 'epoch': 1} {'type': 'loss', 'content': 0.12148896604776382, 'timestamp': '2025-10-01 04:18:31.539188', 'step': 2075, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:31.592979', 'step': 2075, 'epoch': 1} {'type': 'loss', 'content': 0.13137216866016388, 'timestamp': '2025-10-01 04:18:31.598439', 'step': 2076, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:31.651089', 'step': 2076, 'epoch': 1} {'type': 'loss', 'content': 0.3118421733379364, 'timestamp': '2025-10-01 04:18:31.653457', 'step': 2077, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:31.706234', 'step': 2077, 'epoch': 1} {'type': 'loss', 'content': 0.21761342883110046, 'timestamp': '2025-10-01 04:18:31.708222', 'step': 2078, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:18:31.761248', 'step': 2078, 'epoch': 1} {'type': 'loss', 'content': 0.12417208403348923, 'timestamp': '2025-10-01 04:18:31.763669', 'step': 2079, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:31.818417', 'step': 2079, 'epoch': 1} {'type': 'loss', 'content': 0.19715666770935059, 'timestamp': '2025-10-01 04:18:31.824181', 'step': 2080, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:31.890869', 'step': 2080, 'epoch': 1} {'type': 'loss', 'content': 0.1887834221124649, 'timestamp': '2025-10-01 04:18:31.892925', 'step': 2081, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:31.947704', 'step': 2081, 'epoch': 1} {'type': 'loss', 'content': 0.13235054910182953, 'timestamp': '2025-10-01 04:18:31.949866', 'step': 2082, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:32.002592', 'step': 2082, 'epoch': 1} {'type': 'loss', 'content': 0.1449703872203827, 'timestamp': '2025-10-01 04:18:32.004652', 'step': 2083, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:32.058582', 'step': 2083, 'epoch': 1} {'type': 'loss', 'content': 0.13621608912944794, 'timestamp': '2025-10-01 04:18:32.064014', 'step': 2084, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:32.120855', 'step': 2084, 'epoch': 1} {'type': 'loss', 'content': 0.1364871859550476, 'timestamp': '2025-10-01 04:18:32.123316', 'step': 2085, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:32.180573', 'step': 2085, 'epoch': 1} {'type': 'loss', 'content': 0.14406806230545044, 'timestamp': '2025-10-01 04:18:32.182493', 'step': 2086, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:32.235327', 'step': 2086, 'epoch': 1} {'type': 'loss', 'content': 0.22966009378433228, 'timestamp': '2025-10-01 04:18:32.237562', 'step': 2087, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:32.291020', 'step': 2087, 'epoch': 1} {'type': 'loss', 'content': 0.12253624200820923, 'timestamp': '2025-10-01 04:18:32.297333', 'step': 2088, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:32.352403', 'step': 2088, 'epoch': 1} {'type': 'loss', 'content': 0.17447029054164886, 'timestamp': '2025-10-01 04:18:32.354634', 'step': 2089, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:32.408298', 'step': 2089, 'epoch': 1} {'type': 'loss', 'content': 0.12834760546684265, 'timestamp': '2025-10-01 04:18:32.410534', 'step': 2090, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:32.463182', 'step': 2090, 'epoch': 1} {'type': 'loss', 'content': 0.21524839103221893, 'timestamp': '2025-10-01 04:18:32.465422', 'step': 2091, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:32.520961', 'step': 2091, 'epoch': 1} {'type': 'loss', 'content': 0.21538282930850983, 'timestamp': '2025-10-01 04:18:32.527767', 'step': 2092, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:32.591399', 'step': 2092, 'epoch': 1} {'type': 'loss', 'content': 0.2788374423980713, 'timestamp': '2025-10-01 04:18:32.593569', 'step': 2093, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:32.646499', 'step': 2093, 'epoch': 1} {'type': 'loss', 'content': 0.18439342081546783, 'timestamp': '2025-10-01 04:18:32.648796', 'step': 2094, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:32.714277', 'step': 2094, 'epoch': 1} {'type': 'loss', 'content': 0.13947761058807373, 'timestamp': '2025-10-01 04:18:32.716376', 'step': 2095, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:32.770326', 'step': 2095, 'epoch': 1} {'type': 'loss', 'content': 0.11094380170106888, 'timestamp': '2025-10-01 04:18:32.776383', 'step': 2096, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:32.828962', 'step': 2096, 'epoch': 1} {'type': 'loss', 'content': 0.18448446691036224, 'timestamp': '2025-10-01 04:18:32.831457', 'step': 2097, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:32.884673', 'step': 2097, 'epoch': 1} {'type': 'loss', 'content': 0.2623681426048279, 'timestamp': '2025-10-01 04:18:32.888199', 'step': 2098, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:32.941435', 'step': 2098, 'epoch': 1} {'type': 'loss', 'content': 0.19337743520736694, 'timestamp': '2025-10-01 04:18:32.943518', 'step': 2099, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:32.998120', 'step': 2099, 'epoch': 1} {'type': 'loss', 'content': 0.15706975758075714, 'timestamp': '2025-10-01 04:18:33.003885', 'step': 2100, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:33.056480', 'step': 2100, 'epoch': 1} {'type': 'loss', 'content': 0.16980034112930298, 'timestamp': '2025-10-01 04:18:33.058727', 'step': 2101, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:33.112912', 'step': 2101, 'epoch': 1} {'type': 'loss', 'content': 0.239715576171875, 'timestamp': '2025-10-01 04:18:33.115006', 'step': 2102, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:18:33.174246', 'step': 2102, 'epoch': 1} {'type': 'loss', 'content': 0.18097999691963196, 'timestamp': '2025-10-01 04:18:33.176291', 'step': 2103, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:18:33.236069', 'step': 2103, 'epoch': 1} {'type': 'loss', 'content': 0.17355209589004517, 'timestamp': '2025-10-01 04:18:33.244949', 'step': 2104, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:33.300288', 'step': 2104, 'epoch': 1} {'type': 'loss', 'content': 0.15567760169506073, 'timestamp': '2025-10-01 04:18:33.302174', 'step': 2105, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:33.354999', 'step': 2105, 'epoch': 1} {'type': 'loss', 'content': 0.19572289288043976, 'timestamp': '2025-10-01 04:18:33.357145', 'step': 2106, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:33.412677', 'step': 2106, 'epoch': 1} {'type': 'loss', 'content': 0.2063552737236023, 'timestamp': '2025-10-01 04:18:33.418335', 'step': 2107, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:18:33.471784', 'step': 2107, 'epoch': 1} {'type': 'loss', 'content': 0.19669829308986664, 'timestamp': '2025-10-01 04:18:33.477351', 'step': 2108, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:33.533526', 'step': 2108, 'epoch': 1} {'type': 'loss', 'content': 0.19444499909877777, 'timestamp': '2025-10-01 04:18:33.535665', 'step': 2109, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:33.589239', 'step': 2109, 'epoch': 1} {'type': 'loss', 'content': 0.12428995221853256, 'timestamp': '2025-10-01 04:18:33.591692', 'step': 2110, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:33.645604', 'step': 2110, 'epoch': 1} {'type': 'loss', 'content': 0.23223379254341125, 'timestamp': '2025-10-01 04:18:33.647884', 'step': 2111, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:33.700349', 'step': 2111, 'epoch': 1} {'type': 'loss', 'content': 0.16265840828418732, 'timestamp': '2025-10-01 04:18:33.706010', 'step': 2112, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:33.760258', 'step': 2112, 'epoch': 1} {'type': 'loss', 'content': 0.1477583348751068, 'timestamp': '2025-10-01 04:18:33.762559', 'step': 2113, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:33.817122', 'step': 2113, 'epoch': 1} {'type': 'loss', 'content': 0.1548265963792801, 'timestamp': '2025-10-01 04:18:33.819423', 'step': 2114, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:18:33.873130', 'step': 2114, 'epoch': 1} {'type': 'loss', 'content': 0.30392032861709595, 'timestamp': '2025-10-01 04:18:33.875432', 'step': 2115, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:33.928296', 'step': 2115, 'epoch': 1} {'type': 'loss', 'content': 0.24548238515853882, 'timestamp': '2025-10-01 04:18:33.933832', 'step': 2116, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:18:33.986319', 'step': 2116, 'epoch': 1} {'type': 'loss', 'content': 0.2111784666776657, 'timestamp': '2025-10-01 04:18:33.988691', 'step': 2117, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:34.042187', 'step': 2117, 'epoch': 1} {'type': 'loss', 'content': 0.15307316184043884, 'timestamp': '2025-10-01 04:18:34.044234', 'step': 2118, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:34.098071', 'step': 2118, 'epoch': 1} {'type': 'loss', 'content': 0.23467852175235748, 'timestamp': '2025-10-01 04:18:34.099914', 'step': 2119, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:34.152827', 'step': 2119, 'epoch': 1} {'type': 'loss', 'content': 0.18984588980674744, 'timestamp': '2025-10-01 04:18:34.158604', 'step': 2120, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:34.220883', 'step': 2120, 'epoch': 1} {'type': 'loss', 'content': 0.13705967366695404, 'timestamp': '2025-10-01 04:18:34.222735', 'step': 2121, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:18:34.276097', 'step': 2121, 'epoch': 1} {'type': 'loss', 'content': 0.1872337907552719, 'timestamp': '2025-10-01 04:18:34.278228', 'step': 2122, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:34.331163', 'step': 2122, 'epoch': 1} {'type': 'loss', 'content': 0.17198605835437775, 'timestamp': '2025-10-01 04:18:34.333324', 'step': 2123, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:34.386097', 'step': 2123, 'epoch': 1} {'type': 'loss', 'content': 0.16401943564414978, 'timestamp': '2025-10-01 04:18:34.391660', 'step': 2124, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:34.444615', 'step': 2124, 'epoch': 1} {'type': 'loss', 'content': 0.1678636074066162, 'timestamp': '2025-10-01 04:18:34.446755', 'step': 2125, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:34.499825', 'step': 2125, 'epoch': 1} {'type': 'loss', 'content': 0.1659349799156189, 'timestamp': '2025-10-01 04:18:34.501982', 'step': 2126, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:34.563572', 'step': 2126, 'epoch': 1} {'type': 'loss', 'content': 0.13329899311065674, 'timestamp': '2025-10-01 04:18:34.565638', 'step': 2127, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:34.619032', 'step': 2127, 'epoch': 1} {'type': 'loss', 'content': 0.2992013692855835, 'timestamp': '2025-10-01 04:18:34.624644', 'step': 2128, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:18:34.677699', 'step': 2128, 'epoch': 1} {'type': 'loss', 'content': 0.26572519540786743, 'timestamp': '2025-10-01 04:18:34.679668', 'step': 2129, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:34.732607', 'step': 2129, 'epoch': 1} {'type': 'loss', 'content': 0.17339996993541718, 'timestamp': '2025-10-01 04:18:34.735140', 'step': 2130, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:34.792601', 'step': 2130, 'epoch': 1} {'type': 'loss', 'content': 0.16620518267154694, 'timestamp': '2025-10-01 04:18:34.794774', 'step': 2131, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:34.848808', 'step': 2131, 'epoch': 1} {'type': 'loss', 'content': 0.21503758430480957, 'timestamp': '2025-10-01 04:18:34.854974', 'step': 2132, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:18:34.908456', 'step': 2132, 'epoch': 1} {'type': 'loss', 'content': 0.1309366524219513, 'timestamp': '2025-10-01 04:18:34.910751', 'step': 2133, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:34.967528', 'step': 2133, 'epoch': 1} {'type': 'loss', 'content': 0.12149318307638168, 'timestamp': '2025-10-01 04:18:34.969735', 'step': 2134, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:18:35.023325', 'step': 2134, 'epoch': 1} {'type': 'loss', 'content': 0.3509669601917267, 'timestamp': '2025-10-01 04:18:35.025385', 'step': 2135, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:35.078626', 'step': 2135, 'epoch': 1} {'type': 'loss', 'content': 0.10988656431436539, 'timestamp': '2025-10-01 04:18:35.094067', 'step': 2136, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:35.146721', 'step': 2136, 'epoch': 1} {'type': 'loss', 'content': 0.17582137882709503, 'timestamp': '2025-10-01 04:18:35.148815', 'step': 2137, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:35.203179', 'step': 2137, 'epoch': 1} {'type': 'loss', 'content': 0.21964512765407562, 'timestamp': '2025-10-01 04:18:35.205287', 'step': 2138, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:35.259620', 'step': 2138, 'epoch': 1} {'type': 'loss', 'content': 0.10186038166284561, 'timestamp': '2025-10-01 04:18:35.264387', 'step': 2139, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:35.317321', 'step': 2139, 'epoch': 1} {'type': 'loss', 'content': 0.19687716662883759, 'timestamp': '2025-10-01 04:18:35.323186', 'step': 2140, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:35.376804', 'step': 2140, 'epoch': 1} {'type': 'loss', 'content': 0.1657821089029312, 'timestamp': '2025-10-01 04:18:35.379061', 'step': 2141, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:35.431908', 'step': 2141, 'epoch': 1} {'type': 'loss', 'content': 0.13138125836849213, 'timestamp': '2025-10-01 04:18:35.434037', 'step': 2142, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:18:35.497717', 'step': 2142, 'epoch': 1} {'type': 'loss', 'content': 0.17399999499320984, 'timestamp': '2025-10-01 04:18:35.499801', 'step': 2143, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:35.553149', 'step': 2143, 'epoch': 1} {'type': 'loss', 'content': 0.17403466999530792, 'timestamp': '2025-10-01 04:18:35.560067', 'step': 2144, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:35.617088', 'step': 2144, 'epoch': 1} {'type': 'loss', 'content': 0.16460579633712769, 'timestamp': '2025-10-01 04:18:35.619180', 'step': 2145, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:35.673148', 'step': 2145, 'epoch': 1} {'type': 'loss', 'content': 0.16638629138469696, 'timestamp': '2025-10-01 04:18:35.675909', 'step': 2146, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:35.729285', 'step': 2146, 'epoch': 1} {'type': 'loss', 'content': 0.1326819807291031, 'timestamp': '2025-10-01 04:18:35.731835', 'step': 2147, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:35.794862', 'step': 2147, 'epoch': 1} {'type': 'loss', 'content': 0.1995624303817749, 'timestamp': '2025-10-01 04:18:35.800472', 'step': 2148, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:35.853453', 'step': 2148, 'epoch': 1} {'type': 'loss', 'content': 0.16059406101703644, 'timestamp': '2025-10-01 04:18:35.856007', 'step': 2149, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:35.909054', 'step': 2149, 'epoch': 1} {'type': 'loss', 'content': 0.1217949315905571, 'timestamp': '2025-10-01 04:18:35.911096', 'step': 2150, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:35.964768', 'step': 2150, 'epoch': 1} {'type': 'loss', 'content': 0.2525765895843506, 'timestamp': '2025-10-01 04:18:35.967169', 'step': 2151, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:36.020196', 'step': 2151, 'epoch': 1} {'type': 'loss', 'content': 0.1536756455898285, 'timestamp': '2025-10-01 04:18:36.025818', 'step': 2152, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:18:36.078757', 'step': 2152, 'epoch': 1} {'type': 'loss', 'content': 0.10763529688119888, 'timestamp': '2025-10-01 04:18:36.080824', 'step': 2153, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:36.134373', 'step': 2153, 'epoch': 1} {'type': 'loss', 'content': 0.14498376846313477, 'timestamp': '2025-10-01 04:18:36.136872', 'step': 2154, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:36.189668', 'step': 2154, 'epoch': 1} {'type': 'loss', 'content': 0.15529319643974304, 'timestamp': '2025-10-01 04:18:36.191837', 'step': 2155, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:36.245313', 'step': 2155, 'epoch': 1} {'type': 'loss', 'content': 0.2298041135072708, 'timestamp': '2025-10-01 04:18:36.251006', 'step': 2156, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:36.303974', 'step': 2156, 'epoch': 1} {'type': 'loss', 'content': 0.23219642043113708, 'timestamp': '2025-10-01 04:18:36.306420', 'step': 2157, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:18:36.362096', 'step': 2157, 'epoch': 1} {'type': 'loss', 'content': 0.15002016723155975, 'timestamp': '2025-10-01 04:18:36.364318', 'step': 2158, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:36.417887', 'step': 2158, 'epoch': 1} {'type': 'loss', 'content': 0.24827294051647186, 'timestamp': '2025-10-01 04:18:36.419993', 'step': 2159, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:36.473615', 'step': 2159, 'epoch': 1} {'type': 'loss', 'content': 0.15617747604846954, 'timestamp': '2025-10-01 04:18:36.479315', 'step': 2160, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:36.533296', 'step': 2160, 'epoch': 1} {'type': 'loss', 'content': 0.1602381467819214, 'timestamp': '2025-10-01 04:18:36.535721', 'step': 2161, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:36.590778', 'step': 2161, 'epoch': 1} {'type': 'loss', 'content': 0.23726266622543335, 'timestamp': '2025-10-01 04:18:36.592983', 'step': 2162, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:36.646268', 'step': 2162, 'epoch': 1} {'type': 'loss', 'content': 0.22688980400562286, 'timestamp': '2025-10-01 04:18:36.648893', 'step': 2163, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:36.702639', 'step': 2163, 'epoch': 1} {'type': 'loss', 'content': 0.21513091027736664, 'timestamp': '2025-10-01 04:18:36.721795', 'step': 2164, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:36.776267', 'step': 2164, 'epoch': 1} {'type': 'loss', 'content': 0.18497979640960693, 'timestamp': '2025-10-01 04:18:36.778586', 'step': 2165, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:36.845808', 'step': 2165, 'epoch': 1} {'type': 'loss', 'content': 0.13621309399604797, 'timestamp': '2025-10-01 04:18:36.850161', 'step': 2166, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:36.904639', 'step': 2166, 'epoch': 1} {'type': 'loss', 'content': 0.16647765040397644, 'timestamp': '2025-10-01 04:18:36.907326', 'step': 2167, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:36.962320', 'step': 2167, 'epoch': 1} {'type': 'loss', 'content': 0.26952067017555237, 'timestamp': '2025-10-01 04:18:36.979975', 'step': 2168, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:37.033410', 'step': 2168, 'epoch': 1} {'type': 'loss', 'content': 0.21406234800815582, 'timestamp': '2025-10-01 04:18:37.036550', 'step': 2169, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:18:37.090888', 'step': 2169, 'epoch': 1} {'type': 'loss', 'content': 0.16090458631515503, 'timestamp': '2025-10-01 04:18:37.099287', 'step': 2170, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:37.156546', 'step': 2170, 'epoch': 1} {'type': 'loss', 'content': 0.23747293651103973, 'timestamp': '2025-10-01 04:18:37.158823', 'step': 2171, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:37.218438', 'step': 2171, 'epoch': 1} {'type': 'loss', 'content': 0.2915083169937134, 'timestamp': '2025-10-01 04:18:37.224759', 'step': 2172, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:37.278893', 'step': 2172, 'epoch': 1} {'type': 'loss', 'content': 0.20688436925411224, 'timestamp': '2025-10-01 04:18:37.281530', 'step': 2173, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:37.350591', 'step': 2173, 'epoch': 1} {'type': 'loss', 'content': 0.2121579796075821, 'timestamp': '2025-10-01 04:18:37.352914', 'step': 2174, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:37.407075', 'step': 2174, 'epoch': 1} {'type': 'loss', 'content': 0.2513326108455658, 'timestamp': '2025-10-01 04:18:37.409617', 'step': 2175, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:18:37.464403', 'step': 2175, 'epoch': 1} {'type': 'loss', 'content': 0.18675349652767181, 'timestamp': '2025-10-01 04:18:37.470534', 'step': 2176, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:37.524440', 'step': 2176, 'epoch': 1} {'type': 'loss', 'content': 0.14461299777030945, 'timestamp': '2025-10-01 04:18:37.531260', 'step': 2177, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:37.609396', 'step': 2177, 'epoch': 1} {'type': 'loss', 'content': 0.11709270626306534, 'timestamp': '2025-10-01 04:18:37.611885', 'step': 2178, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:37.666437', 'step': 2178, 'epoch': 1} {'type': 'loss', 'content': 0.11807754635810852, 'timestamp': '2025-10-01 04:18:37.669556', 'step': 2179, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:37.723881', 'step': 2179, 'epoch': 1} {'type': 'loss', 'content': 0.22686432301998138, 'timestamp': '2025-10-01 04:18:37.730253', 'step': 2180, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:37.784047', 'step': 2180, 'epoch': 1} {'type': 'loss', 'content': 0.17169979214668274, 'timestamp': '2025-10-01 04:18:37.786458', 'step': 2181, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:37.840336', 'step': 2181, 'epoch': 1} {'type': 'loss', 'content': 0.2929593324661255, 'timestamp': '2025-10-01 04:18:37.861742', 'step': 2182, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:37.935484', 'step': 2182, 'epoch': 1} {'type': 'loss', 'content': 0.11504296213388443, 'timestamp': '2025-10-01 04:18:37.944908', 'step': 2183, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:38.038813', 'step': 2183, 'epoch': 1} {'type': 'loss', 'content': 0.2504780888557434, 'timestamp': '2025-10-01 04:18:38.057906', 'step': 2184, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:38.181294', 'step': 2184, 'epoch': 1} {'type': 'loss', 'content': 0.2608591318130493, 'timestamp': '2025-10-01 04:18:38.193996', 'step': 2185, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:38.320732', 'step': 2185, 'epoch': 1} {'type': 'loss', 'content': 0.2086717039346695, 'timestamp': '2025-10-01 04:18:38.343859', 'step': 2186, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:38.450910', 'step': 2186, 'epoch': 1} {'type': 'loss', 'content': 0.15313224494457245, 'timestamp': '2025-10-01 04:18:38.464727', 'step': 2187, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:38.546992', 'step': 2187, 'epoch': 1} {'type': 'loss', 'content': 0.2624874711036682, 'timestamp': '2025-10-01 04:18:38.567391', 'step': 2188, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:38.664952', 'step': 2188, 'epoch': 1} {'type': 'loss', 'content': 0.16685150563716888, 'timestamp': '2025-10-01 04:18:38.672310', 'step': 2189, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:38.764557', 'step': 2189, 'epoch': 1} {'type': 'loss', 'content': 0.1531715840101242, 'timestamp': '2025-10-01 04:18:38.780018', 'step': 2190, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:18:38.857291', 'step': 2190, 'epoch': 1} {'type': 'loss', 'content': 0.15524011850357056, 'timestamp': '2025-10-01 04:18:38.867142', 'step': 2191, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:38.948825', 'step': 2191, 'epoch': 1} {'type': 'loss', 'content': 0.15262024104595184, 'timestamp': '2025-10-01 04:18:38.978411', 'step': 2192, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:39.054155', 'step': 2192, 'epoch': 1} {'type': 'loss', 'content': 0.15305282175540924, 'timestamp': '2025-10-01 04:18:39.057927', 'step': 2193, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:39.153674', 'step': 2193, 'epoch': 1} {'type': 'loss', 'content': 0.1864829808473587, 'timestamp': '2025-10-01 04:18:39.155718', 'step': 2194, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:39.209589', 'step': 2194, 'epoch': 1} {'type': 'loss', 'content': 0.1911633461713791, 'timestamp': '2025-10-01 04:18:39.211666', 'step': 2195, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:39.264692', 'step': 2195, 'epoch': 1} {'type': 'loss', 'content': 0.18526898324489594, 'timestamp': '2025-10-01 04:18:39.270428', 'step': 2196, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:39.322966', 'step': 2196, 'epoch': 1} {'type': 'loss', 'content': 0.14105850458145142, 'timestamp': '2025-10-01 04:18:39.325049', 'step': 2197, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:39.377810', 'step': 2197, 'epoch': 1} {'type': 'loss', 'content': 0.09119873493909836, 'timestamp': '2025-10-01 04:18:39.379664', 'step': 2198, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:39.432656', 'step': 2198, 'epoch': 1} {'type': 'loss', 'content': 0.1862051635980606, 'timestamp': '2025-10-01 04:18:39.434890', 'step': 2199, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:18:39.488190', 'step': 2199, 'epoch': 1} {'type': 'loss', 'content': 0.24549500644207, 'timestamp': '2025-10-01 04:18:39.498663', 'step': 2200, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:39.550752', 'step': 2200, 'epoch': 1} {'type': 'loss', 'content': 0.13223928213119507, 'timestamp': '2025-10-01 04:18:39.552849', 'step': 2201, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-01 04:18:39.605758', 'step': 2201, 'epoch': 1} {'type': 'loss', 'content': 0.2357455939054489, 'timestamp': '2025-10-01 04:18:39.607827', 'step': 2202, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:39.660485', 'step': 2202, 'epoch': 1} {'type': 'loss', 'content': 0.17727002501487732, 'timestamp': '2025-10-01 04:18:39.662626', 'step': 2203, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:39.716374', 'step': 2203, 'epoch': 1} {'type': 'loss', 'content': 0.10850498080253601, 'timestamp': '2025-10-01 04:18:39.721982', 'step': 2204, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:39.775703', 'step': 2204, 'epoch': 1} {'type': 'loss', 'content': 0.13068290054798126, 'timestamp': '2025-10-01 04:18:39.778354', 'step': 2205, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:39.831723', 'step': 2205, 'epoch': 1} {'type': 'loss', 'content': 0.26928526163101196, 'timestamp': '2025-10-01 04:18:39.833750', 'step': 2206, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:39.888811', 'step': 2206, 'epoch': 1} {'type': 'loss', 'content': 0.17074809968471527, 'timestamp': '2025-10-01 04:18:39.890917', 'step': 2207, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:18:39.943992', 'step': 2207, 'epoch': 1} {'type': 'loss', 'content': 0.235841304063797, 'timestamp': '2025-10-01 04:18:39.949741', 'step': 2208, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:40.003132', 'step': 2208, 'epoch': 1} {'type': 'loss', 'content': 0.14358720183372498, 'timestamp': '2025-10-01 04:18:40.010294', 'step': 2209, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:40.078463', 'step': 2209, 'epoch': 1} {'type': 'loss', 'content': 0.1908918172121048, 'timestamp': '2025-10-01 04:18:40.080494', 'step': 2210, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:40.134283', 'step': 2210, 'epoch': 1} {'type': 'loss', 'content': 0.32051295042037964, 'timestamp': '2025-10-01 04:18:40.140532', 'step': 2211, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:40.204276', 'step': 2211, 'epoch': 1} {'type': 'loss', 'content': 0.33049121499061584, 'timestamp': '2025-10-01 04:18:40.209556', 'step': 2212, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:40.262156', 'step': 2212, 'epoch': 1} {'type': 'loss', 'content': 0.06982509046792984, 'timestamp': '2025-10-01 04:18:40.264557', 'step': 2213, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:40.318908', 'step': 2213, 'epoch': 1} {'type': 'loss', 'content': 0.21965162456035614, 'timestamp': '2025-10-01 04:18:40.321008', 'step': 2214, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:18:40.374826', 'step': 2214, 'epoch': 1} {'type': 'loss', 'content': 0.2037159949541092, 'timestamp': '2025-10-01 04:18:40.377109', 'step': 2215, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:40.432358', 'step': 2215, 'epoch': 1} {'type': 'loss', 'content': 0.23882287740707397, 'timestamp': '2025-10-01 04:18:40.437906', 'step': 2216, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:40.491312', 'step': 2216, 'epoch': 1} {'type': 'loss', 'content': 0.10706187784671783, 'timestamp': '2025-10-01 04:18:40.493227', 'step': 2217, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:40.545986', 'step': 2217, 'epoch': 1} {'type': 'loss', 'content': 0.2629019618034363, 'timestamp': '2025-10-01 04:18:40.550469', 'step': 2218, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:40.603811', 'step': 2218, 'epoch': 1} {'type': 'loss', 'content': 0.15004855394363403, 'timestamp': '2025-10-01 04:18:40.606625', 'step': 2219, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:18:40.660550', 'step': 2219, 'epoch': 1} {'type': 'loss', 'content': 0.16229042410850525, 'timestamp': '2025-10-01 04:18:40.666215', 'step': 2220, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:40.719213', 'step': 2220, 'epoch': 1} {'type': 'loss', 'content': 0.22395667433738708, 'timestamp': '2025-10-01 04:18:40.721815', 'step': 2221, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:40.774816', 'step': 2221, 'epoch': 1} {'type': 'loss', 'content': 0.14492462575435638, 'timestamp': '2025-10-01 04:18:40.777626', 'step': 2222, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:40.830099', 'step': 2222, 'epoch': 1} {'type': 'loss', 'content': 0.2545093894004822, 'timestamp': '2025-10-01 04:18:40.832354', 'step': 2223, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:40.885815', 'step': 2223, 'epoch': 1} {'type': 'loss', 'content': 0.2114560455083847, 'timestamp': '2025-10-01 04:18:40.891240', 'step': 2224, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:40.943506', 'step': 2224, 'epoch': 1} {'type': 'loss', 'content': 0.17226193845272064, 'timestamp': '2025-10-01 04:18:40.946100', 'step': 2225, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:40.999227', 'step': 2225, 'epoch': 1} {'type': 'loss', 'content': 0.23918622732162476, 'timestamp': '2025-10-01 04:18:41.001391', 'step': 2226, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:41.056075', 'step': 2226, 'epoch': 1} {'type': 'loss', 'content': 0.2258254885673523, 'timestamp': '2025-10-01 04:18:41.058257', 'step': 2227, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:18:41.113465', 'step': 2227, 'epoch': 1} {'type': 'loss', 'content': 0.139154314994812, 'timestamp': '2025-10-01 04:18:41.120005', 'step': 2228, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:18:41.176525', 'step': 2228, 'epoch': 1} {'type': 'loss', 'content': 0.15862451493740082, 'timestamp': '2025-10-01 04:18:41.178684', 'step': 2229, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:41.245831', 'step': 2229, 'epoch': 1} {'type': 'loss', 'content': 0.22850367426872253, 'timestamp': '2025-10-01 04:18:41.250852', 'step': 2230, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:41.306871', 'step': 2230, 'epoch': 1} {'type': 'loss', 'content': 0.13242307305335999, 'timestamp': '2025-10-01 04:18:41.309020', 'step': 2231, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:41.367674', 'step': 2231, 'epoch': 1} {'type': 'loss', 'content': 0.16899892687797546, 'timestamp': '2025-10-01 04:18:41.373138', 'step': 2232, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:41.426070', 'step': 2232, 'epoch': 1} {'type': 'loss', 'content': 0.09681975096464157, 'timestamp': '2025-10-01 04:18:41.430639', 'step': 2233, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:41.486271', 'step': 2233, 'epoch': 1} {'type': 'loss', 'content': 0.19235831499099731, 'timestamp': '2025-10-01 04:18:41.488331', 'step': 2234, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:41.541589', 'step': 2234, 'epoch': 1} {'type': 'loss', 'content': 0.1932220607995987, 'timestamp': '2025-10-01 04:18:41.544534', 'step': 2235, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:41.599104', 'step': 2235, 'epoch': 1} {'type': 'loss', 'content': 0.18262574076652527, 'timestamp': '2025-10-01 04:18:41.614775', 'step': 2236, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:41.673809', 'step': 2236, 'epoch': 1} {'type': 'loss', 'content': 0.36367926001548767, 'timestamp': '2025-10-01 04:18:41.679372', 'step': 2237, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:41.732727', 'step': 2237, 'epoch': 1} {'type': 'loss', 'content': 0.13960476219654083, 'timestamp': '2025-10-01 04:18:41.734828', 'step': 2238, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:41.788123', 'step': 2238, 'epoch': 1} {'type': 'loss', 'content': 0.1888500154018402, 'timestamp': '2025-10-01 04:18:41.790066', 'step': 2239, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:41.843359', 'step': 2239, 'epoch': 1} {'type': 'loss', 'content': 0.11597327142953873, 'timestamp': '2025-10-01 04:18:41.848852', 'step': 2240, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:41.901159', 'step': 2240, 'epoch': 1} {'type': 'loss', 'content': 0.199765145778656, 'timestamp': '2025-10-01 04:18:41.903278', 'step': 2241, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:41.959214', 'step': 2241, 'epoch': 1} {'type': 'loss', 'content': 0.1963893175125122, 'timestamp': '2025-10-01 04:18:41.962784', 'step': 2242, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:18:42.018827', 'step': 2242, 'epoch': 1} {'type': 'loss', 'content': 0.22866502404212952, 'timestamp': '2025-10-01 04:18:42.020883', 'step': 2243, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:18:42.074268', 'step': 2243, 'epoch': 1} {'type': 'loss', 'content': 0.10612182319164276, 'timestamp': '2025-10-01 04:18:42.080127', 'step': 2244, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:42.135651', 'step': 2244, 'epoch': 1} {'type': 'loss', 'content': 0.24887333810329437, 'timestamp': '2025-10-01 04:18:42.137645', 'step': 2245, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:18:42.190770', 'step': 2245, 'epoch': 1} {'type': 'loss', 'content': 0.18061858415603638, 'timestamp': '2025-10-01 04:18:42.192859', 'step': 2246, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:18:42.260507', 'step': 2246, 'epoch': 1} {'type': 'loss', 'content': 0.23295623064041138, 'timestamp': '2025-10-01 04:18:42.265330', 'step': 2247, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:18:42.329713', 'step': 2247, 'epoch': 1} {'type': 'loss', 'content': 0.1638868898153305, 'timestamp': '2025-10-01 04:18:42.344444', 'step': 2248, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:42.408241', 'step': 2248, 'epoch': 1} {'type': 'loss', 'content': 0.17640750110149384, 'timestamp': '2025-10-01 04:18:42.410282', 'step': 2249, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:18:42.468813', 'step': 2249, 'epoch': 1} {'type': 'loss', 'content': 0.18980352580547333, 'timestamp': '2025-10-01 04:18:42.472113', 'step': 2250, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:18:42.526928', 'step': 2250, 'epoch': 1} {'type': 'loss', 'content': 0.14390553534030914, 'timestamp': '2025-10-01 04:18:42.529053', 'step': 2251, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:42.587479', 'step': 2251, 'epoch': 1} {'type': 'loss', 'content': 0.1963210105895996, 'timestamp': '2025-10-01 04:18:42.593358', 'step': 2252, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:42.653103', 'step': 2252, 'epoch': 1} {'type': 'loss', 'content': 0.18757793307304382, 'timestamp': '2025-10-01 04:18:42.655188', 'step': 2253, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:42.708619', 'step': 2253, 'epoch': 1} {'type': 'loss', 'content': 0.1517532914876938, 'timestamp': '2025-10-01 04:18:42.710779', 'step': 2254, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:18:42.770395', 'step': 2254, 'epoch': 1} {'type': 'loss', 'content': 0.19264547526836395, 'timestamp': '2025-10-01 04:18:42.772823', 'step': 2255, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:42.825930', 'step': 2255, 'epoch': 1} {'type': 'loss', 'content': 0.16643574833869934, 'timestamp': '2025-10-01 04:18:42.831652', 'step': 2256, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:42.884759', 'step': 2256, 'epoch': 1} {'type': 'loss', 'content': 0.20928195118904114, 'timestamp': '2025-10-01 04:18:42.887275', 'step': 2257, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:42.954110', 'step': 2257, 'epoch': 1} {'type': 'loss', 'content': 0.11337877064943314, 'timestamp': '2025-10-01 04:18:42.956226', 'step': 2258, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:43.012161', 'step': 2258, 'epoch': 1} {'type': 'loss', 'content': 0.2587517201900482, 'timestamp': '2025-10-01 04:18:43.014608', 'step': 2259, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:43.080577', 'step': 2259, 'epoch': 1} {'type': 'loss', 'content': 0.1876661777496338, 'timestamp': '2025-10-01 04:18:43.086296', 'step': 2260, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:43.139122', 'step': 2260, 'epoch': 1} {'type': 'loss', 'content': 0.18724492192268372, 'timestamp': '2025-10-01 04:18:43.141196', 'step': 2261, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:43.210399', 'step': 2261, 'epoch': 1} {'type': 'loss', 'content': 0.22781209647655487, 'timestamp': '2025-10-01 04:18:43.216482', 'step': 2262, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:43.284790', 'step': 2262, 'epoch': 1} {'type': 'loss', 'content': 0.1813139170408249, 'timestamp': '2025-10-01 04:18:43.290281', 'step': 2263, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:43.344447', 'step': 2263, 'epoch': 1} {'type': 'loss', 'content': 0.15090663731098175, 'timestamp': '2025-10-01 04:18:43.353173', 'step': 2264, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:43.413261', 'step': 2264, 'epoch': 1} {'type': 'loss', 'content': 0.15048830211162567, 'timestamp': '2025-10-01 04:18:43.415399', 'step': 2265, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:43.471555', 'step': 2265, 'epoch': 1} {'type': 'loss', 'content': 0.15878911316394806, 'timestamp': '2025-10-01 04:18:43.473571', 'step': 2266, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:43.540745', 'step': 2266, 'epoch': 1} {'type': 'loss', 'content': 0.2036096602678299, 'timestamp': '2025-10-01 04:18:43.542774', 'step': 2267, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:43.596926', 'step': 2267, 'epoch': 1} {'type': 'loss', 'content': 0.14127743244171143, 'timestamp': '2025-10-01 04:18:43.604027', 'step': 2268, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:43.665922', 'step': 2268, 'epoch': 1} {'type': 'loss', 'content': 0.2877334654331207, 'timestamp': '2025-10-01 04:18:43.680213', 'step': 2269, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:43.735437', 'step': 2269, 'epoch': 1} {'type': 'loss', 'content': 0.22785824537277222, 'timestamp': '2025-10-01 04:18:43.744425', 'step': 2270, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:43.799584', 'step': 2270, 'epoch': 1} {'type': 'loss', 'content': 0.16397874057292938, 'timestamp': '2025-10-01 04:18:43.820957', 'step': 2271, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:43.890803', 'step': 2271, 'epoch': 1} {'type': 'loss', 'content': 0.1467612385749817, 'timestamp': '2025-10-01 04:18:43.910213', 'step': 2272, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:43.962907', 'step': 2272, 'epoch': 1} {'type': 'loss', 'content': 0.2579438090324402, 'timestamp': '2025-10-01 04:18:43.964845', 'step': 2273, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:44.018794', 'step': 2273, 'epoch': 1} {'type': 'loss', 'content': 0.14781324565410614, 'timestamp': '2025-10-01 04:18:44.023198', 'step': 2274, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:44.081461', 'step': 2274, 'epoch': 1} {'type': 'loss', 'content': 0.2124074399471283, 'timestamp': '2025-10-01 04:18:44.091747', 'step': 2275, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:44.147657', 'step': 2275, 'epoch': 1} {'type': 'loss', 'content': 0.2169298529624939, 'timestamp': '2025-10-01 04:18:44.154243', 'step': 2276, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:44.209228', 'step': 2276, 'epoch': 1} {'type': 'loss', 'content': 0.1996145397424698, 'timestamp': '2025-10-01 04:18:44.212781', 'step': 2277, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:44.266575', 'step': 2277, 'epoch': 1} {'type': 'loss', 'content': 0.23165081441402435, 'timestamp': '2025-10-01 04:18:44.270436', 'step': 2278, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:44.342181', 'step': 2278, 'epoch': 1} {'type': 'loss', 'content': 0.2481713593006134, 'timestamp': '2025-10-01 04:18:44.345386', 'step': 2279, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:44.404545', 'step': 2279, 'epoch': 1} {'type': 'loss', 'content': 0.20145931839942932, 'timestamp': '2025-10-01 04:18:44.415878', 'step': 2280, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:44.470135', 'step': 2280, 'epoch': 1} {'type': 'loss', 'content': 0.2264731526374817, 'timestamp': '2025-10-01 04:18:44.473067', 'step': 2281, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:44.530933', 'step': 2281, 'epoch': 1} {'type': 'loss', 'content': 0.2013983428478241, 'timestamp': '2025-10-01 04:18:44.541559', 'step': 2282, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:44.595373', 'step': 2282, 'epoch': 1} {'type': 'loss', 'content': 0.17177799344062805, 'timestamp': '2025-10-01 04:18:44.598147', 'step': 2283, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:44.653894', 'step': 2283, 'epoch': 1} {'type': 'loss', 'content': 0.22673636674880981, 'timestamp': '2025-10-01 04:18:44.660519', 'step': 2284, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:44.715670', 'step': 2284, 'epoch': 1} {'type': 'loss', 'content': 0.123360276222229, 'timestamp': '2025-10-01 04:18:44.718117', 'step': 2285, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:18:44.773185', 'step': 2285, 'epoch': 1} {'type': 'loss', 'content': 0.1023377850651741, 'timestamp': '2025-10-01 04:18:44.775433', 'step': 2286, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:44.833742', 'step': 2286, 'epoch': 1} {'type': 'loss', 'content': 0.14477261900901794, 'timestamp': '2025-10-01 04:18:44.836113', 'step': 2287, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:44.894196', 'step': 2287, 'epoch': 1} {'type': 'loss', 'content': 0.18161432445049286, 'timestamp': '2025-10-01 04:18:44.901505', 'step': 2288, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:18:44.958110', 'step': 2288, 'epoch': 1} {'type': 'loss', 'content': 0.2507985234260559, 'timestamp': '2025-10-01 04:18:44.969011', 'step': 2289, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:18:45.024693', 'step': 2289, 'epoch': 1} {'type': 'loss', 'content': 0.21211618185043335, 'timestamp': '2025-10-01 04:18:45.026992', 'step': 2290, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:45.081608', 'step': 2290, 'epoch': 1} {'type': 'loss', 'content': 0.2661093473434448, 'timestamp': '2025-10-01 04:18:45.083907', 'step': 2291, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:45.146801', 'step': 2291, 'epoch': 1} {'type': 'loss', 'content': 0.16357865929603577, 'timestamp': '2025-10-01 04:18:45.153422', 'step': 2292, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:45.215196', 'step': 2292, 'epoch': 1} {'type': 'loss', 'content': 0.18770021200180054, 'timestamp': '2025-10-01 04:18:45.217233', 'step': 2293, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:45.271372', 'step': 2293, 'epoch': 1} {'type': 'loss', 'content': 0.19478720426559448, 'timestamp': '2025-10-01 04:18:45.273870', 'step': 2294, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:45.327941', 'step': 2294, 'epoch': 1} {'type': 'loss', 'content': 0.24191491305828094, 'timestamp': '2025-10-01 04:18:45.329873', 'step': 2295, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:45.385129', 'step': 2295, 'epoch': 1} {'type': 'loss', 'content': 0.1440662145614624, 'timestamp': '2025-10-01 04:18:45.391361', 'step': 2296, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:45.444905', 'step': 2296, 'epoch': 1} {'type': 'loss', 'content': 0.1989096999168396, 'timestamp': '2025-10-01 04:18:45.446909', 'step': 2297, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:18:45.500658', 'step': 2297, 'epoch': 1} {'type': 'loss', 'content': 0.12141094356775284, 'timestamp': '2025-10-01 04:18:45.503019', 'step': 2298, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:45.559353', 'step': 2298, 'epoch': 1} {'type': 'loss', 'content': 0.17903953790664673, 'timestamp': '2025-10-01 04:18:45.561569', 'step': 2299, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:45.615390', 'step': 2299, 'epoch': 1} {'type': 'loss', 'content': 0.2031688243150711, 'timestamp': '2025-10-01 04:18:45.621944', 'step': 2300, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:45.675832', 'step': 2300, 'epoch': 1} {'type': 'loss', 'content': 0.15091128647327423, 'timestamp': '2025-10-01 04:18:45.677872', 'step': 2301, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:45.731600', 'step': 2301, 'epoch': 1} {'type': 'loss', 'content': 0.17648683488368988, 'timestamp': '2025-10-01 04:18:45.733691', 'step': 2302, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:45.787558', 'step': 2302, 'epoch': 1} {'type': 'loss', 'content': 0.2350873500108719, 'timestamp': '2025-10-01 04:18:45.791934', 'step': 2303, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:45.849630', 'step': 2303, 'epoch': 1} {'type': 'loss', 'content': 0.17533913254737854, 'timestamp': '2025-10-01 04:18:45.858766', 'step': 2304, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:45.920997', 'step': 2304, 'epoch': 1} {'type': 'loss', 'content': 0.18633365631103516, 'timestamp': '2025-10-01 04:18:45.923150', 'step': 2305, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:45.978791', 'step': 2305, 'epoch': 1} {'type': 'loss', 'content': 0.2319074124097824, 'timestamp': '2025-10-01 04:18:45.981325', 'step': 2306, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:46.040775', 'step': 2306, 'epoch': 1} {'type': 'loss', 'content': 0.14295358955860138, 'timestamp': '2025-10-01 04:18:46.042969', 'step': 2307, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:46.104645', 'step': 2307, 'epoch': 1} {'type': 'loss', 'content': 0.3205782175064087, 'timestamp': '2025-10-01 04:18:46.110692', 'step': 2308, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:18:46.164635', 'step': 2308, 'epoch': 1} {'type': 'loss', 'content': 0.22810620069503784, 'timestamp': '2025-10-01 04:18:46.167901', 'step': 2309, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:46.230222', 'step': 2309, 'epoch': 1} {'type': 'loss', 'content': 0.21263284981250763, 'timestamp': '2025-10-01 04:18:46.232662', 'step': 2310, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:46.287534', 'step': 2310, 'epoch': 1} {'type': 'loss', 'content': 0.11298955976963043, 'timestamp': '2025-10-01 04:18:46.289481', 'step': 2311, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:46.343737', 'step': 2311, 'epoch': 1} {'type': 'loss', 'content': 0.17038306593894958, 'timestamp': '2025-10-01 04:18:46.349836', 'step': 2312, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:18:46.404199', 'step': 2312, 'epoch': 1} {'type': 'loss', 'content': 0.13932397961616516, 'timestamp': '2025-10-01 04:18:46.406668', 'step': 2313, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:46.460687', 'step': 2313, 'epoch': 1} {'type': 'loss', 'content': 0.12587587535381317, 'timestamp': '2025-10-01 04:18:46.464109', 'step': 2314, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:18:46.519178', 'step': 2314, 'epoch': 1} {'type': 'loss', 'content': 0.14356009662151337, 'timestamp': '2025-10-01 04:18:46.521329', 'step': 2315, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:18:46.577120', 'step': 2315, 'epoch': 1} {'type': 'loss', 'content': 0.11936955153942108, 'timestamp': '2025-10-01 04:18:46.592916', 'step': 2316, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:18:46.646355', 'step': 2316, 'epoch': 1} {'type': 'loss', 'content': 0.13238492608070374, 'timestamp': '2025-10-01 04:18:46.648569', 'step': 2317, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:46.702590', 'step': 2317, 'epoch': 1} {'type': 'loss', 'content': 0.11148768663406372, 'timestamp': '2025-10-01 04:18:46.704840', 'step': 2318, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:46.758658', 'step': 2318, 'epoch': 1} {'type': 'loss', 'content': 0.178654283285141, 'timestamp': '2025-10-01 04:18:46.761268', 'step': 2319, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-01 04:18:46.816421', 'step': 2319, 'epoch': 1} {'type': 'loss', 'content': 0.14599846303462982, 'timestamp': '2025-10-01 04:18:46.823024', 'step': 2320, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:46.881683', 'step': 2320, 'epoch': 1} {'type': 'loss', 'content': 0.2592432200908661, 'timestamp': '2025-10-01 04:18:46.884281', 'step': 2321, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:46.938561', 'step': 2321, 'epoch': 1} {'type': 'loss', 'content': 0.1740133911371231, 'timestamp': '2025-10-01 04:18:46.941473', 'step': 2322, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:46.996166', 'step': 2322, 'epoch': 1} {'type': 'loss', 'content': 0.1638365089893341, 'timestamp': '2025-10-01 04:18:46.999273', 'step': 2323, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:47.054664', 'step': 2323, 'epoch': 1} {'type': 'loss', 'content': 0.20843364298343658, 'timestamp': '2025-10-01 04:18:47.060637', 'step': 2324, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:18:47.113988', 'step': 2324, 'epoch': 1} {'type': 'loss', 'content': 0.22995196282863617, 'timestamp': '2025-10-01 04:18:47.116926', 'step': 2325, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:47.171676', 'step': 2325, 'epoch': 1} {'type': 'loss', 'content': 0.11067598313093185, 'timestamp': '2025-10-01 04:18:47.174070', 'step': 2326, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:47.238709', 'step': 2326, 'epoch': 1} {'type': 'loss', 'content': 0.1769157350063324, 'timestamp': '2025-10-01 04:18:47.240881', 'step': 2327, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:18:47.296476', 'step': 2327, 'epoch': 1} {'type': 'loss', 'content': 0.12315062433481216, 'timestamp': '2025-10-01 04:18:47.302498', 'step': 2328, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:18:47.357105', 'step': 2328, 'epoch': 1} {'type': 'loss', 'content': 0.1608298271894455, 'timestamp': '2025-10-01 04:18:47.359518', 'step': 2329, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:18:47.413895', 'step': 2329, 'epoch': 1} {'type': 'loss', 'content': 0.15296541154384613, 'timestamp': '2025-10-01 04:18:47.415869', 'step': 2330, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-01 04:19:01.205097', 'step': 2330, 'epoch': 1} {'type': 'pplx', 'content': 9549.185378805976, 'timestamp': '2025-10-01 04:19:01.208646', 'step': 2330, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:01.264124', 'step': 2330, 'epoch': 1} {'type': 'loss', 'content': 0.1675727665424347, 'timestamp': '2025-10-01 04:19:01.266138', 'step': 2331, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:01.319770', 'step': 2331, 'epoch': 1} {'type': 'loss', 'content': 0.23048751056194305, 'timestamp': '2025-10-01 04:19:01.325748', 'step': 2332, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:01.378832', 'step': 2332, 'epoch': 1} {'type': 'loss', 'content': 0.16013626754283905, 'timestamp': '2025-10-01 04:19:01.380812', 'step': 2333, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:19:01.444376', 'step': 2333, 'epoch': 1} {'type': 'loss', 'content': 0.22381450235843658, 'timestamp': '2025-10-01 04:19:01.446451', 'step': 2334, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:01.502917', 'step': 2334, 'epoch': 1} {'type': 'loss', 'content': 0.15302592515945435, 'timestamp': '2025-10-01 04:19:01.505036', 'step': 2335, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:01.571324', 'step': 2335, 'epoch': 1} {'type': 'loss', 'content': 0.18069612979888916, 'timestamp': '2025-10-01 04:19:01.577476', 'step': 2336, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:01.632272', 'step': 2336, 'epoch': 1} {'type': 'loss', 'content': 0.16022153198719025, 'timestamp': '2025-10-01 04:19:01.634234', 'step': 2337, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:19:01.686665', 'step': 2337, 'epoch': 1} {'type': 'loss', 'content': 0.12027806043624878, 'timestamp': '2025-10-01 04:19:01.688890', 'step': 2338, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:01.741480', 'step': 2338, 'epoch': 1} {'type': 'loss', 'content': 0.13409240543842316, 'timestamp': '2025-10-01 04:19:01.743653', 'step': 2339, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:01.796628', 'step': 2339, 'epoch': 1} {'type': 'loss', 'content': 0.161576509475708, 'timestamp': '2025-10-01 04:19:01.802395', 'step': 2340, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:01.855083', 'step': 2340, 'epoch': 1} {'type': 'loss', 'content': 0.21208244562149048, 'timestamp': '2025-10-01 04:19:01.857032', 'step': 2341, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:01.911412', 'step': 2341, 'epoch': 1} {'type': 'loss', 'content': 0.15818721055984497, 'timestamp': '2025-10-01 04:19:01.913822', 'step': 2342, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:01.967331', 'step': 2342, 'epoch': 1} {'type': 'loss', 'content': 0.17450957000255585, 'timestamp': '2025-10-01 04:19:01.969721', 'step': 2343, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:02.022668', 'step': 2343, 'epoch': 1} {'type': 'loss', 'content': 0.14349499344825745, 'timestamp': '2025-10-01 04:19:02.028822', 'step': 2344, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:02.081209', 'step': 2344, 'epoch': 1} {'type': 'loss', 'content': 0.22360776364803314, 'timestamp': '2025-10-01 04:19:02.083453', 'step': 2345, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:02.135704', 'step': 2345, 'epoch': 1} {'type': 'loss', 'content': 0.17581887543201447, 'timestamp': '2025-10-01 04:19:02.138175', 'step': 2346, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:02.191593', 'step': 2346, 'epoch': 1} {'type': 'loss', 'content': 0.15492123365402222, 'timestamp': '2025-10-01 04:19:02.193961', 'step': 2347, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:02.256492', 'step': 2347, 'epoch': 1} {'type': 'loss', 'content': 0.1535673439502716, 'timestamp': '2025-10-01 04:19:02.262402', 'step': 2348, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:02.314986', 'step': 2348, 'epoch': 1} {'type': 'loss', 'content': 0.18324923515319824, 'timestamp': '2025-10-01 04:19:02.317217', 'step': 2349, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:02.370064', 'step': 2349, 'epoch': 1} {'type': 'loss', 'content': 0.18083874881267548, 'timestamp': '2025-10-01 04:19:02.372373', 'step': 2350, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:02.427075', 'step': 2350, 'epoch': 1} {'type': 'loss', 'content': 0.2748125195503235, 'timestamp': '2025-10-01 04:19:02.429654', 'step': 2351, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:02.482264', 'step': 2351, 'epoch': 1} {'type': 'loss', 'content': 0.07398665696382523, 'timestamp': '2025-10-01 04:19:02.488005', 'step': 2352, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:02.540822', 'step': 2352, 'epoch': 1} {'type': 'loss', 'content': 0.1575181633234024, 'timestamp': '2025-10-01 04:19:02.543124', 'step': 2353, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:19:02.595807', 'step': 2353, 'epoch': 1} {'type': 'loss', 'content': 0.10013186931610107, 'timestamp': '2025-10-01 04:19:02.598090', 'step': 2354, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:02.651322', 'step': 2354, 'epoch': 1} {'type': 'loss', 'content': 0.2501397728919983, 'timestamp': '2025-10-01 04:19:02.657836', 'step': 2355, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:02.710280', 'step': 2355, 'epoch': 1} {'type': 'loss', 'content': 0.1686849147081375, 'timestamp': '2025-10-01 04:19:02.717193', 'step': 2356, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:02.769943', 'step': 2356, 'epoch': 1} {'type': 'loss', 'content': 0.17606109380722046, 'timestamp': '2025-10-01 04:19:02.773679', 'step': 2357, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:02.826144', 'step': 2357, 'epoch': 1} {'type': 'loss', 'content': 0.12612690031528473, 'timestamp': '2025-10-01 04:19:02.828457', 'step': 2358, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:19:02.882019', 'step': 2358, 'epoch': 1} {'type': 'loss', 'content': 0.1768704503774643, 'timestamp': '2025-10-01 04:19:02.884191', 'step': 2359, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:02.938458', 'step': 2359, 'epoch': 1} {'type': 'loss', 'content': 0.21424037218093872, 'timestamp': '2025-10-01 04:19:02.944275', 'step': 2360, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:02.997281', 'step': 2360, 'epoch': 1} {'type': 'loss', 'content': 0.2576574385166168, 'timestamp': '2025-10-01 04:19:02.999593', 'step': 2361, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:03.053045', 'step': 2361, 'epoch': 1} {'type': 'loss', 'content': 0.20247891545295715, 'timestamp': '2025-10-01 04:19:03.055261', 'step': 2362, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:03.118671', 'step': 2362, 'epoch': 1} {'type': 'loss', 'content': 0.16433656215667725, 'timestamp': '2025-10-01 04:19:03.120848', 'step': 2363, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:19:03.173677', 'step': 2363, 'epoch': 1} {'type': 'loss', 'content': 0.34424251317977905, 'timestamp': '2025-10-01 04:19:03.179568', 'step': 2364, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:03.232595', 'step': 2364, 'epoch': 1} {'type': 'loss', 'content': 0.2393120974302292, 'timestamp': '2025-10-01 04:19:03.235259', 'step': 2365, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:03.287650', 'step': 2365, 'epoch': 1} {'type': 'loss', 'content': 0.27304527163505554, 'timestamp': '2025-10-01 04:19:03.289895', 'step': 2366, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:03.342136', 'step': 2366, 'epoch': 1} {'type': 'loss', 'content': 0.1509179323911667, 'timestamp': '2025-10-01 04:19:03.344367', 'step': 2367, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:03.397142', 'step': 2367, 'epoch': 1} {'type': 'loss', 'content': 0.19960348308086395, 'timestamp': '2025-10-01 04:19:03.402949', 'step': 2368, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:03.455695', 'step': 2368, 'epoch': 1} {'type': 'loss', 'content': 0.24786895513534546, 'timestamp': '2025-10-01 04:19:03.458652', 'step': 2369, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:03.512249', 'step': 2369, 'epoch': 1} {'type': 'loss', 'content': 0.14260230958461761, 'timestamp': '2025-10-01 04:19:03.514530', 'step': 2370, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:03.567776', 'step': 2370, 'epoch': 1} {'type': 'loss', 'content': 0.12989450991153717, 'timestamp': '2025-10-01 04:19:03.570278', 'step': 2371, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:03.623252', 'step': 2371, 'epoch': 1} {'type': 'loss', 'content': 0.12399667501449585, 'timestamp': '2025-10-01 04:19:03.630987', 'step': 2372, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:03.683582', 'step': 2372, 'epoch': 1} {'type': 'loss', 'content': 0.19752182066440582, 'timestamp': '2025-10-01 04:19:03.685960', 'step': 2373, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:03.739317', 'step': 2373, 'epoch': 1} {'type': 'loss', 'content': 0.23077574372291565, 'timestamp': '2025-10-01 04:19:03.741573', 'step': 2374, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:03.796182', 'step': 2374, 'epoch': 1} {'type': 'loss', 'content': 0.2508200407028198, 'timestamp': '2025-10-01 04:19:03.800120', 'step': 2375, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:03.855184', 'step': 2375, 'epoch': 1} {'type': 'loss', 'content': 0.19983220100402832, 'timestamp': '2025-10-01 04:19:03.862318', 'step': 2376, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:03.915184', 'step': 2376, 'epoch': 1} {'type': 'loss', 'content': 0.3176816701889038, 'timestamp': '2025-10-01 04:19:03.918694', 'step': 2377, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:03.972584', 'step': 2377, 'epoch': 1} {'type': 'loss', 'content': 0.26332712173461914, 'timestamp': '2025-10-01 04:19:03.974954', 'step': 2378, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:04.032050', 'step': 2378, 'epoch': 1} {'type': 'loss', 'content': 0.1542721539735794, 'timestamp': '2025-10-01 04:19:04.034479', 'step': 2379, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:04.089580', 'step': 2379, 'epoch': 1} {'type': 'loss', 'content': 0.14070077240467072, 'timestamp': '2025-10-01 04:19:04.096026', 'step': 2380, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:04.149350', 'step': 2380, 'epoch': 1} {'type': 'loss', 'content': 0.1257544457912445, 'timestamp': '2025-10-01 04:19:04.151712', 'step': 2381, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:19:04.203994', 'step': 2381, 'epoch': 1} {'type': 'loss', 'content': 0.23381072282791138, 'timestamp': '2025-10-01 04:19:04.206308', 'step': 2382, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:04.259146', 'step': 2382, 'epoch': 1} {'type': 'loss', 'content': 0.19566313922405243, 'timestamp': '2025-10-01 04:19:04.261348', 'step': 2383, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:19:04.313649', 'step': 2383, 'epoch': 1} {'type': 'loss', 'content': 0.28659185767173767, 'timestamp': '2025-10-01 04:19:04.319721', 'step': 2384, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:04.371842', 'step': 2384, 'epoch': 1} {'type': 'loss', 'content': 0.1986631155014038, 'timestamp': '2025-10-01 04:19:04.382101', 'step': 2385, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:19:04.435153', 'step': 2385, 'epoch': 1} {'type': 'loss', 'content': 0.22263383865356445, 'timestamp': '2025-10-01 04:19:04.437493', 'step': 2386, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:04.490832', 'step': 2386, 'epoch': 1} {'type': 'loss', 'content': 0.1671725958585739, 'timestamp': '2025-10-01 04:19:04.493177', 'step': 2387, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:04.551745', 'step': 2387, 'epoch': 1} {'type': 'loss', 'content': 0.18215647339820862, 'timestamp': '2025-10-01 04:19:04.557713', 'step': 2388, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:04.609831', 'step': 2388, 'epoch': 1} {'type': 'loss', 'content': 0.18525438010692596, 'timestamp': '2025-10-01 04:19:04.612124', 'step': 2389, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:04.664865', 'step': 2389, 'epoch': 1} {'type': 'loss', 'content': 0.20229221880435944, 'timestamp': '2025-10-01 04:19:04.666995', 'step': 2390, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:19:04.719429', 'step': 2390, 'epoch': 1} {'type': 'loss', 'content': 0.24159370362758636, 'timestamp': '2025-10-01 04:19:04.721723', 'step': 2391, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:04.774764', 'step': 2391, 'epoch': 1} {'type': 'loss', 'content': 0.22690095007419586, 'timestamp': '2025-10-01 04:19:04.780876', 'step': 2392, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:04.833035', 'step': 2392, 'epoch': 1} {'type': 'loss', 'content': 0.25377991795539856, 'timestamp': '2025-10-01 04:19:04.837388', 'step': 2393, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:04.890003', 'step': 2393, 'epoch': 1} {'type': 'loss', 'content': 0.12627068161964417, 'timestamp': '2025-10-01 04:19:04.893448', 'step': 2394, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:04.946207', 'step': 2394, 'epoch': 1} {'type': 'loss', 'content': 0.19848096370697021, 'timestamp': '2025-10-01 04:19:04.948452', 'step': 2395, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:05.001232', 'step': 2395, 'epoch': 1} {'type': 'loss', 'content': 0.21111896634101868, 'timestamp': '2025-10-01 04:19:05.006977', 'step': 2396, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:05.059800', 'step': 2396, 'epoch': 1} {'type': 'loss', 'content': 0.10031837224960327, 'timestamp': '2025-10-01 04:19:05.062185', 'step': 2397, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:05.115848', 'step': 2397, 'epoch': 1} {'type': 'loss', 'content': 0.12881211936473846, 'timestamp': '2025-10-01 04:19:05.118106', 'step': 2398, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:05.171192', 'step': 2398, 'epoch': 1} {'type': 'loss', 'content': 0.18921220302581787, 'timestamp': '2025-10-01 04:19:05.173449', 'step': 2399, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:19:05.225884', 'step': 2399, 'epoch': 1} {'type': 'loss', 'content': 0.14789018034934998, 'timestamp': '2025-10-01 04:19:05.231639', 'step': 2400, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:05.299149', 'step': 2400, 'epoch': 1} {'type': 'loss', 'content': 0.1912010908126831, 'timestamp': '2025-10-01 04:19:05.301421', 'step': 2401, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:05.354553', 'step': 2401, 'epoch': 1} {'type': 'loss', 'content': 0.1334054172039032, 'timestamp': '2025-10-01 04:19:05.357028', 'step': 2402, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:05.409691', 'step': 2402, 'epoch': 1} {'type': 'loss', 'content': 0.1812829077243805, 'timestamp': '2025-10-01 04:19:05.411965', 'step': 2403, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:05.465934', 'step': 2403, 'epoch': 1} {'type': 'loss', 'content': 0.13599884510040283, 'timestamp': '2025-10-01 04:19:05.471814', 'step': 2404, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:05.537028', 'step': 2404, 'epoch': 1} {'type': 'loss', 'content': 0.1628413200378418, 'timestamp': '2025-10-01 04:19:05.539240', 'step': 2405, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:05.593905', 'step': 2405, 'epoch': 1} {'type': 'loss', 'content': 0.18266886472702026, 'timestamp': '2025-10-01 04:19:05.596161', 'step': 2406, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:05.649255', 'step': 2406, 'epoch': 1} {'type': 'loss', 'content': 0.1917855441570282, 'timestamp': '2025-10-01 04:19:05.651855', 'step': 2407, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:19:05.712095', 'step': 2407, 'epoch': 1} {'type': 'loss', 'content': 0.12889103591442108, 'timestamp': '2025-10-01 04:19:05.718084', 'step': 2408, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:05.771160', 'step': 2408, 'epoch': 1} {'type': 'loss', 'content': 0.21558405458927155, 'timestamp': '2025-10-01 04:19:05.773488', 'step': 2409, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:19:05.828049', 'step': 2409, 'epoch': 1} {'type': 'loss', 'content': 0.22061920166015625, 'timestamp': '2025-10-01 04:19:05.830767', 'step': 2410, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:05.886607', 'step': 2410, 'epoch': 1} {'type': 'loss', 'content': 0.2959465980529785, 'timestamp': '2025-10-01 04:19:05.889595', 'step': 2411, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:05.943751', 'step': 2411, 'epoch': 1} {'type': 'loss', 'content': 0.2407505214214325, 'timestamp': '2025-10-01 04:19:05.950069', 'step': 2412, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:06.004378', 'step': 2412, 'epoch': 1} {'type': 'loss', 'content': 0.2333516925573349, 'timestamp': '2025-10-01 04:19:06.006675', 'step': 2413, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:06.060426', 'step': 2413, 'epoch': 1} {'type': 'loss', 'content': 0.2078365534543991, 'timestamp': '2025-10-01 04:19:06.062833', 'step': 2414, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:06.116445', 'step': 2414, 'epoch': 1} {'type': 'loss', 'content': 0.239353209733963, 'timestamp': '2025-10-01 04:19:06.119032', 'step': 2415, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:19:06.177598', 'step': 2415, 'epoch': 1} {'type': 'loss', 'content': 0.2178974449634552, 'timestamp': '2025-10-01 04:19:06.184081', 'step': 2416, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:19:06.238586', 'step': 2416, 'epoch': 1} {'type': 'loss', 'content': 0.15681293606758118, 'timestamp': '2025-10-01 04:19:06.242061', 'step': 2417, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:06.295854', 'step': 2417, 'epoch': 1} {'type': 'loss', 'content': 0.26980555057525635, 'timestamp': '2025-10-01 04:19:06.298017', 'step': 2418, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:06.353036', 'step': 2418, 'epoch': 1} {'type': 'loss', 'content': 0.1920596808195114, 'timestamp': '2025-10-01 04:19:06.354847', 'step': 2419, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:06.407630', 'step': 2419, 'epoch': 1} {'type': 'loss', 'content': 0.1840853989124298, 'timestamp': '2025-10-01 04:19:06.414354', 'step': 2420, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:19:06.481474', 'step': 2420, 'epoch': 1} {'type': 'loss', 'content': 0.17159149050712585, 'timestamp': '2025-10-01 04:19:06.483994', 'step': 2421, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:06.536621', 'step': 2421, 'epoch': 1} {'type': 'loss', 'content': 0.24663753807544708, 'timestamp': '2025-10-01 04:19:06.538851', 'step': 2422, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:19:06.592116', 'step': 2422, 'epoch': 1} {'type': 'loss', 'content': 0.19351887702941895, 'timestamp': '2025-10-01 04:19:06.594546', 'step': 2423, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:06.647973', 'step': 2423, 'epoch': 1} {'type': 'loss', 'content': 0.1285635083913803, 'timestamp': '2025-10-01 04:19:06.654166', 'step': 2424, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:06.707193', 'step': 2424, 'epoch': 1} {'type': 'loss', 'content': 0.18434356153011322, 'timestamp': '2025-10-01 04:19:06.709360', 'step': 2425, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:06.762231', 'step': 2425, 'epoch': 1} {'type': 'loss', 'content': 0.1967126876115799, 'timestamp': '2025-10-01 04:19:06.764599', 'step': 2426, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:06.817702', 'step': 2426, 'epoch': 1} {'type': 'loss', 'content': 0.16593779623508453, 'timestamp': '2025-10-01 04:19:06.820008', 'step': 2427, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:06.872738', 'step': 2427, 'epoch': 1} {'type': 'loss', 'content': 0.2598157525062561, 'timestamp': '2025-10-01 04:19:06.878462', 'step': 2428, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:06.942307', 'step': 2428, 'epoch': 1} {'type': 'loss', 'content': 0.12159205973148346, 'timestamp': '2025-10-01 04:19:06.950885', 'step': 2429, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:19:07.003900', 'step': 2429, 'epoch': 1} {'type': 'loss', 'content': 0.12683941423892975, 'timestamp': '2025-10-01 04:19:07.007093', 'step': 2430, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:07.072483', 'step': 2430, 'epoch': 1} {'type': 'loss', 'content': 0.21353019773960114, 'timestamp': '2025-10-01 04:19:07.075170', 'step': 2431, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:07.127923', 'step': 2431, 'epoch': 1} {'type': 'loss', 'content': 0.18434350192546844, 'timestamp': '2025-10-01 04:19:07.134269', 'step': 2432, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:07.186161', 'step': 2432, 'epoch': 1} {'type': 'loss', 'content': 0.18821369111537933, 'timestamp': '2025-10-01 04:19:07.188157', 'step': 2433, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:07.249478', 'step': 2433, 'epoch': 1} {'type': 'loss', 'content': 0.12061912566423416, 'timestamp': '2025-10-01 04:19:07.251709', 'step': 2434, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:07.304162', 'step': 2434, 'epoch': 1} {'type': 'loss', 'content': 0.23803381621837616, 'timestamp': '2025-10-01 04:19:07.306457', 'step': 2435, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:07.359352', 'step': 2435, 'epoch': 1} {'type': 'loss', 'content': 0.10958526283502579, 'timestamp': '2025-10-01 04:19:07.365493', 'step': 2436, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:07.418033', 'step': 2436, 'epoch': 1} {'type': 'loss', 'content': 0.1449459344148636, 'timestamp': '2025-10-01 04:19:07.420299', 'step': 2437, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:07.473183', 'step': 2437, 'epoch': 1} {'type': 'loss', 'content': 0.14882075786590576, 'timestamp': '2025-10-01 04:19:07.479569', 'step': 2438, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:07.534033', 'step': 2438, 'epoch': 1} {'type': 'loss', 'content': 0.14052410423755646, 'timestamp': '2025-10-01 04:19:07.535985', 'step': 2439, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:07.589264', 'step': 2439, 'epoch': 1} {'type': 'loss', 'content': 0.17607541382312775, 'timestamp': '2025-10-01 04:19:07.595391', 'step': 2440, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:07.648546', 'step': 2440, 'epoch': 1} {'type': 'loss', 'content': 0.29775646328926086, 'timestamp': '2025-10-01 04:19:07.653660', 'step': 2441, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:07.710966', 'step': 2441, 'epoch': 1} {'type': 'loss', 'content': 0.21652497351169586, 'timestamp': '2025-10-01 04:19:07.717089', 'step': 2442, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:19:07.770161', 'step': 2442, 'epoch': 1} {'type': 'loss', 'content': 0.1881939172744751, 'timestamp': '2025-10-01 04:19:07.772412', 'step': 2443, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:07.829940', 'step': 2443, 'epoch': 1} {'type': 'loss', 'content': 0.27111074328422546, 'timestamp': '2025-10-01 04:19:07.844495', 'step': 2444, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:19:07.900461', 'step': 2444, 'epoch': 1} {'type': 'loss', 'content': 0.08211329579353333, 'timestamp': '2025-10-01 04:19:07.902824', 'step': 2445, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:07.959781', 'step': 2445, 'epoch': 1} {'type': 'loss', 'content': 0.2122754156589508, 'timestamp': '2025-10-01 04:19:07.962008', 'step': 2446, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:08.015091', 'step': 2446, 'epoch': 1} {'type': 'loss', 'content': 0.13262757658958435, 'timestamp': '2025-10-01 04:19:08.017420', 'step': 2447, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:08.071393', 'step': 2447, 'epoch': 1} {'type': 'loss', 'content': 0.1812383532524109, 'timestamp': '2025-10-01 04:19:08.077975', 'step': 2448, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:08.132747', 'step': 2448, 'epoch': 1} {'type': 'loss', 'content': 0.21463526785373688, 'timestamp': '2025-10-01 04:19:08.135575', 'step': 2449, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:08.189209', 'step': 2449, 'epoch': 1} {'type': 'loss', 'content': 0.1794195920228958, 'timestamp': '2025-10-01 04:19:08.191198', 'step': 2450, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:08.246412', 'step': 2450, 'epoch': 1} {'type': 'loss', 'content': 0.10268393903970718, 'timestamp': '2025-10-01 04:19:08.248799', 'step': 2451, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:08.303966', 'step': 2451, 'epoch': 1} {'type': 'loss', 'content': 0.24994134902954102, 'timestamp': '2025-10-01 04:19:08.315399', 'step': 2452, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:08.379993', 'step': 2452, 'epoch': 1} {'type': 'loss', 'content': 0.15272371470928192, 'timestamp': '2025-10-01 04:19:08.382131', 'step': 2453, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:08.439402', 'step': 2453, 'epoch': 1} {'type': 'loss', 'content': 0.1376856416463852, 'timestamp': '2025-10-01 04:19:08.441933', 'step': 2454, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:08.495399', 'step': 2454, 'epoch': 1} {'type': 'loss', 'content': 0.16201159358024597, 'timestamp': '2025-10-01 04:19:08.497310', 'step': 2455, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:19:08.550695', 'step': 2455, 'epoch': 1} {'type': 'loss', 'content': 0.17666460573673248, 'timestamp': '2025-10-01 04:19:08.556951', 'step': 2456, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:08.610410', 'step': 2456, 'epoch': 1} {'type': 'loss', 'content': 0.18012087047100067, 'timestamp': '2025-10-01 04:19:08.612822', 'step': 2457, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:08.665374', 'step': 2457, 'epoch': 1} {'type': 'loss', 'content': 0.14926792681217194, 'timestamp': '2025-10-01 04:19:08.667768', 'step': 2458, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:08.720397', 'step': 2458, 'epoch': 1} {'type': 'loss', 'content': 0.23411059379577637, 'timestamp': '2025-10-01 04:19:08.723081', 'step': 2459, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:08.776165', 'step': 2459, 'epoch': 1} {'type': 'loss', 'content': 0.16115783154964447, 'timestamp': '2025-10-01 04:19:08.782506', 'step': 2460, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:08.834967', 'step': 2460, 'epoch': 1} {'type': 'loss', 'content': 0.2043960690498352, 'timestamp': '2025-10-01 04:19:08.837131', 'step': 2461, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:08.890639', 'step': 2461, 'epoch': 1} {'type': 'loss', 'content': 0.1694275438785553, 'timestamp': '2025-10-01 04:19:08.892795', 'step': 2462, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:08.946793', 'step': 2462, 'epoch': 1} {'type': 'loss', 'content': 0.24621857702732086, 'timestamp': '2025-10-01 04:19:08.948924', 'step': 2463, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:09.003461', 'step': 2463, 'epoch': 1} {'type': 'loss', 'content': 0.21384653449058533, 'timestamp': '2025-10-01 04:19:09.009972', 'step': 2464, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:19:09.065564', 'step': 2464, 'epoch': 1} {'type': 'loss', 'content': 0.2179233580827713, 'timestamp': '2025-10-01 04:19:09.067769', 'step': 2465, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:09.122551', 'step': 2465, 'epoch': 1} {'type': 'loss', 'content': 0.20340995490550995, 'timestamp': '2025-10-01 04:19:09.124830', 'step': 2466, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:09.180340', 'step': 2466, 'epoch': 1} {'type': 'loss', 'content': 0.13565212488174438, 'timestamp': '2025-10-01 04:19:09.182886', 'step': 2467, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:19:09.236952', 'step': 2467, 'epoch': 1} {'type': 'loss', 'content': 0.21287518739700317, 'timestamp': '2025-10-01 04:19:09.243730', 'step': 2468, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:09.296946', 'step': 2468, 'epoch': 1} {'type': 'loss', 'content': 0.10993581265211105, 'timestamp': '2025-10-01 04:19:09.300358', 'step': 2469, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:09.361036', 'step': 2469, 'epoch': 1} {'type': 'loss', 'content': 0.2924831211566925, 'timestamp': '2025-10-01 04:19:09.363269', 'step': 2470, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:09.418910', 'step': 2470, 'epoch': 1} {'type': 'loss', 'content': 0.1511460542678833, 'timestamp': '2025-10-01 04:19:09.421175', 'step': 2471, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:09.476488', 'step': 2471, 'epoch': 1} {'type': 'loss', 'content': 0.15564605593681335, 'timestamp': '2025-10-01 04:19:09.482661', 'step': 2472, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:09.536266', 'step': 2472, 'epoch': 1} {'type': 'loss', 'content': 0.18235139548778534, 'timestamp': '2025-10-01 04:19:09.538984', 'step': 2473, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:09.601785', 'step': 2473, 'epoch': 1} {'type': 'loss', 'content': 0.2119128555059433, 'timestamp': '2025-10-01 04:19:09.604365', 'step': 2474, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:19:09.664579', 'step': 2474, 'epoch': 1} {'type': 'loss', 'content': 0.1044657900929451, 'timestamp': '2025-10-01 04:19:09.667233', 'step': 2475, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:09.726380', 'step': 2475, 'epoch': 1} {'type': 'loss', 'content': 0.2008340060710907, 'timestamp': '2025-10-01 04:19:09.733806', 'step': 2476, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:09.789640', 'step': 2476, 'epoch': 1} {'type': 'loss', 'content': 0.26584139466285706, 'timestamp': '2025-10-01 04:19:09.792010', 'step': 2477, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:09.845602', 'step': 2477, 'epoch': 1} {'type': 'loss', 'content': 0.09946200251579285, 'timestamp': '2025-10-01 04:19:09.848058', 'step': 2478, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:09.901530', 'step': 2478, 'epoch': 1} {'type': 'loss', 'content': 0.16610586643218994, 'timestamp': '2025-10-01 04:19:09.903893', 'step': 2479, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:09.959414', 'step': 2479, 'epoch': 1} {'type': 'loss', 'content': 0.12500110268592834, 'timestamp': '2025-10-01 04:19:09.969737', 'step': 2480, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:19:10.023899', 'step': 2480, 'epoch': 1} {'type': 'loss', 'content': 0.2538769245147705, 'timestamp': '2025-10-01 04:19:10.026305', 'step': 2481, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:19:10.078634', 'step': 2481, 'epoch': 1} {'type': 'loss', 'content': 0.1478910893201828, 'timestamp': '2025-10-01 04:19:10.083925', 'step': 2482, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:10.136891', 'step': 2482, 'epoch': 1} {'type': 'loss', 'content': 0.22807350754737854, 'timestamp': '2025-10-01 04:19:10.139155', 'step': 2483, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:10.191884', 'step': 2483, 'epoch': 1} {'type': 'loss', 'content': 0.18112218379974365, 'timestamp': '2025-10-01 04:19:10.197438', 'step': 2484, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:10.249202', 'step': 2484, 'epoch': 1} {'type': 'loss', 'content': 0.21338306367397308, 'timestamp': '2025-10-01 04:19:10.256664', 'step': 2485, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:10.309286', 'step': 2485, 'epoch': 1} {'type': 'loss', 'content': 0.2532312273979187, 'timestamp': '2025-10-01 04:19:10.311678', 'step': 2486, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:10.364710', 'step': 2486, 'epoch': 1} {'type': 'loss', 'content': 0.12329715490341187, 'timestamp': '2025-10-01 04:19:10.367407', 'step': 2487, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:19:10.420758', 'step': 2487, 'epoch': 1} {'type': 'loss', 'content': 0.269522488117218, 'timestamp': '2025-10-01 04:19:10.427095', 'step': 2488, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:10.480623', 'step': 2488, 'epoch': 1} {'type': 'loss', 'content': 0.2052430808544159, 'timestamp': '2025-10-01 04:19:10.483456', 'step': 2489, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:10.536317', 'step': 2489, 'epoch': 1} {'type': 'loss', 'content': 0.16353170573711395, 'timestamp': '2025-10-01 04:19:10.538473', 'step': 2490, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:10.590947', 'step': 2490, 'epoch': 1} {'type': 'loss', 'content': 0.20826120674610138, 'timestamp': '2025-10-01 04:19:10.593414', 'step': 2491, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:10.646919', 'step': 2491, 'epoch': 1} {'type': 'loss', 'content': 0.15951402485370636, 'timestamp': '2025-10-01 04:19:10.652399', 'step': 2492, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:10.705459', 'step': 2492, 'epoch': 1} {'type': 'loss', 'content': 0.13451671600341797, 'timestamp': '2025-10-01 04:19:10.707609', 'step': 2493, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:10.760227', 'step': 2493, 'epoch': 1} {'type': 'loss', 'content': 0.305350124835968, 'timestamp': '2025-10-01 04:19:10.762450', 'step': 2494, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:10.815375', 'step': 2494, 'epoch': 1} {'type': 'loss', 'content': 0.2637692391872406, 'timestamp': '2025-10-01 04:19:10.818293', 'step': 2495, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:10.872219', 'step': 2495, 'epoch': 1} {'type': 'loss', 'content': 0.13813769817352295, 'timestamp': '2025-10-01 04:19:10.878134', 'step': 2496, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:10.930563', 'step': 2496, 'epoch': 1} {'type': 'loss', 'content': 0.14094072580337524, 'timestamp': '2025-10-01 04:19:10.932662', 'step': 2497, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:19:10.986009', 'step': 2497, 'epoch': 1} {'type': 'loss', 'content': 0.18784324824810028, 'timestamp': '2025-10-01 04:19:10.988171', 'step': 2498, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:11.041344', 'step': 2498, 'epoch': 1} {'type': 'loss', 'content': 0.12678936123847961, 'timestamp': '2025-10-01 04:19:11.043875', 'step': 2499, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:11.095925', 'step': 2499, 'epoch': 1} {'type': 'loss', 'content': 0.22963179647922516, 'timestamp': '2025-10-01 04:19:11.101686', 'step': 2500, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 2500', 'timestamp': '2025-10-01 04:19:11.470335', 'step': 2500, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:11.522481', 'step': 2500, 'epoch': 1} {'type': 'loss', 'content': 0.18440786004066467, 'timestamp': '2025-10-01 04:19:11.525231', 'step': 2501, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:11.577760', 'step': 2501, 'epoch': 1} {'type': 'loss', 'content': 0.20975331962108612, 'timestamp': '2025-10-01 04:19:11.580348', 'step': 2502, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:11.633325', 'step': 2502, 'epoch': 1} {'type': 'loss', 'content': 0.11171288043260574, 'timestamp': '2025-10-01 04:19:11.636362', 'step': 2503, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:19:11.690541', 'step': 2503, 'epoch': 1} {'type': 'loss', 'content': 0.19097431004047394, 'timestamp': '2025-10-01 04:19:11.696433', 'step': 2504, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:11.748626', 'step': 2504, 'epoch': 1} {'type': 'loss', 'content': 0.22504922747612, 'timestamp': '2025-10-01 04:19:11.750856', 'step': 2505, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:11.803610', 'step': 2505, 'epoch': 1} {'type': 'loss', 'content': 0.15519903600215912, 'timestamp': '2025-10-01 04:19:11.809583', 'step': 2506, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:11.863349', 'step': 2506, 'epoch': 1} {'type': 'loss', 'content': 0.17322921752929688, 'timestamp': '2025-10-01 04:19:11.865662', 'step': 2507, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:11.918968', 'step': 2507, 'epoch': 1} {'type': 'loss', 'content': 0.26513275504112244, 'timestamp': '2025-10-01 04:19:11.924710', 'step': 2508, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:11.977115', 'step': 2508, 'epoch': 1} {'type': 'loss', 'content': 0.1302076280117035, 'timestamp': '2025-10-01 04:19:11.979207', 'step': 2509, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:12.032070', 'step': 2509, 'epoch': 1} {'type': 'loss', 'content': 0.15753071010112762, 'timestamp': '2025-10-01 04:19:12.034258', 'step': 2510, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:12.087055', 'step': 2510, 'epoch': 1} {'type': 'loss', 'content': 0.18135181069374084, 'timestamp': '2025-10-01 04:19:12.090181', 'step': 2511, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:12.144873', 'step': 2511, 'epoch': 1} {'type': 'loss', 'content': 0.17106395959854126, 'timestamp': '2025-10-01 04:19:12.152256', 'step': 2512, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:12.206328', 'step': 2512, 'epoch': 1} {'type': 'loss', 'content': 0.313428670167923, 'timestamp': '2025-10-01 04:19:12.209192', 'step': 2513, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:12.262799', 'step': 2513, 'epoch': 1} {'type': 'loss', 'content': 0.1744188815355301, 'timestamp': '2025-10-01 04:19:12.265065', 'step': 2514, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:12.318907', 'step': 2514, 'epoch': 1} {'type': 'loss', 'content': 0.18803511559963226, 'timestamp': '2025-10-01 04:19:12.321312', 'step': 2515, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:12.374719', 'step': 2515, 'epoch': 1} {'type': 'loss', 'content': 0.20336300134658813, 'timestamp': '2025-10-01 04:19:12.380692', 'step': 2516, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:12.436666', 'step': 2516, 'epoch': 1} {'type': 'loss', 'content': 0.2126576155424118, 'timestamp': '2025-10-01 04:19:12.439061', 'step': 2517, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:12.491025', 'step': 2517, 'epoch': 1} {'type': 'loss', 'content': 0.10883620381355286, 'timestamp': '2025-10-01 04:19:12.493079', 'step': 2518, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:19:12.545241', 'step': 2518, 'epoch': 1} {'type': 'loss', 'content': 0.15872877836227417, 'timestamp': '2025-10-01 04:19:12.547715', 'step': 2519, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:12.600602', 'step': 2519, 'epoch': 1} {'type': 'loss', 'content': 0.15364696085453033, 'timestamp': '2025-10-01 04:19:12.606360', 'step': 2520, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:12.658178', 'step': 2520, 'epoch': 1} {'type': 'loss', 'content': 0.29097726941108704, 'timestamp': '2025-10-01 04:19:12.662963', 'step': 2521, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:12.717025', 'step': 2521, 'epoch': 1} {'type': 'loss', 'content': 0.09662393480539322, 'timestamp': '2025-10-01 04:19:12.719393', 'step': 2522, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:12.774897', 'step': 2522, 'epoch': 1} {'type': 'loss', 'content': 0.1314069628715515, 'timestamp': '2025-10-01 04:19:12.777090', 'step': 2523, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:12.833835', 'step': 2523, 'epoch': 1} {'type': 'loss', 'content': 0.1685096025466919, 'timestamp': '2025-10-01 04:19:12.840146', 'step': 2524, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:12.894017', 'step': 2524, 'epoch': 1} {'type': 'loss', 'content': 0.2604244351387024, 'timestamp': '2025-10-01 04:19:12.896815', 'step': 2525, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:12.954083', 'step': 2525, 'epoch': 1} {'type': 'loss', 'content': 0.129282146692276, 'timestamp': '2025-10-01 04:19:12.956990', 'step': 2526, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:13.010130', 'step': 2526, 'epoch': 1} {'type': 'loss', 'content': 0.2615315020084381, 'timestamp': '2025-10-01 04:19:13.012734', 'step': 2527, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:13.073254', 'step': 2527, 'epoch': 1} {'type': 'loss', 'content': 0.25415825843811035, 'timestamp': '2025-10-01 04:19:13.079477', 'step': 2528, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:19:13.131704', 'step': 2528, 'epoch': 1} {'type': 'loss', 'content': 0.18654990196228027, 'timestamp': '2025-10-01 04:19:13.133869', 'step': 2529, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:13.186948', 'step': 2529, 'epoch': 1} {'type': 'loss', 'content': 0.21181218326091766, 'timestamp': '2025-10-01 04:19:13.189457', 'step': 2530, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:19:13.246949', 'step': 2530, 'epoch': 1} {'type': 'loss', 'content': 0.20538966357707977, 'timestamp': '2025-10-01 04:19:13.249623', 'step': 2531, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:13.303330', 'step': 2531, 'epoch': 1} {'type': 'loss', 'content': 0.17763829231262207, 'timestamp': '2025-10-01 04:19:13.309469', 'step': 2532, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:13.362172', 'step': 2532, 'epoch': 1} {'type': 'loss', 'content': 0.16743870079517365, 'timestamp': '2025-10-01 04:19:13.364307', 'step': 2533, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:13.417533', 'step': 2533, 'epoch': 1} {'type': 'loss', 'content': 0.14553366601467133, 'timestamp': '2025-10-01 04:19:13.419849', 'step': 2534, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:13.475930', 'step': 2534, 'epoch': 1} {'type': 'loss', 'content': 0.16780373454093933, 'timestamp': '2025-10-01 04:19:13.478438', 'step': 2535, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:13.533341', 'step': 2535, 'epoch': 1} {'type': 'loss', 'content': 0.2181483656167984, 'timestamp': '2025-10-01 04:19:13.540065', 'step': 2536, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:13.592971', 'step': 2536, 'epoch': 1} {'type': 'loss', 'content': 0.0934322327375412, 'timestamp': '2025-10-01 04:19:13.595521', 'step': 2537, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:13.650520', 'step': 2537, 'epoch': 1} {'type': 'loss', 'content': 0.11647802591323853, 'timestamp': '2025-10-01 04:19:13.653936', 'step': 2538, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:13.709224', 'step': 2538, 'epoch': 1} {'type': 'loss', 'content': 0.20680847764015198, 'timestamp': '2025-10-01 04:19:13.711750', 'step': 2539, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:13.766040', 'step': 2539, 'epoch': 1} {'type': 'loss', 'content': 0.19685925543308258, 'timestamp': '2025-10-01 04:19:13.772872', 'step': 2540, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:19:13.828169', 'step': 2540, 'epoch': 1} {'type': 'loss', 'content': 0.21067838370800018, 'timestamp': '2025-10-01 04:19:13.831461', 'step': 2541, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:13.885625', 'step': 2541, 'epoch': 1} {'type': 'loss', 'content': 0.12594495713710785, 'timestamp': '2025-10-01 04:19:13.888422', 'step': 2542, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:13.942646', 'step': 2542, 'epoch': 1} {'type': 'loss', 'content': 0.11287007480859756, 'timestamp': '2025-10-01 04:19:13.944985', 'step': 2543, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:13.999852', 'step': 2543, 'epoch': 1} {'type': 'loss', 'content': 0.21623164415359497, 'timestamp': '2025-10-01 04:19:14.006392', 'step': 2544, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:14.061011', 'step': 2544, 'epoch': 1} {'type': 'loss', 'content': 0.16425009071826935, 'timestamp': '2025-10-01 04:19:14.063393', 'step': 2545, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:14.115984', 'step': 2545, 'epoch': 1} {'type': 'loss', 'content': 0.1683691293001175, 'timestamp': '2025-10-01 04:19:14.118299', 'step': 2546, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:14.171390', 'step': 2546, 'epoch': 1} {'type': 'loss', 'content': 0.20642632246017456, 'timestamp': '2025-10-01 04:19:14.173540', 'step': 2547, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:14.226345', 'step': 2547, 'epoch': 1} {'type': 'loss', 'content': 0.2564546763896942, 'timestamp': '2025-10-01 04:19:14.232606', 'step': 2548, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:14.284810', 'step': 2548, 'epoch': 1} {'type': 'loss', 'content': 0.22687600553035736, 'timestamp': '2025-10-01 04:19:14.286921', 'step': 2549, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:19:14.343213', 'step': 2549, 'epoch': 1} {'type': 'loss', 'content': 0.13272212445735931, 'timestamp': '2025-10-01 04:19:14.345300', 'step': 2550, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:14.412977', 'step': 2550, 'epoch': 1} {'type': 'loss', 'content': 0.22048860788345337, 'timestamp': '2025-10-01 04:19:14.415219', 'step': 2551, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:14.472952', 'step': 2551, 'epoch': 1} {'type': 'loss', 'content': 0.11392544955015182, 'timestamp': '2025-10-01 04:19:14.478957', 'step': 2552, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:14.531233', 'step': 2552, 'epoch': 1} {'type': 'loss', 'content': 0.24093817174434662, 'timestamp': '2025-10-01 04:19:14.533417', 'step': 2553, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:19:14.585870', 'step': 2553, 'epoch': 1} {'type': 'loss', 'content': 0.16967050731182098, 'timestamp': '2025-10-01 04:19:14.588208', 'step': 2554, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:14.641454', 'step': 2554, 'epoch': 1} {'type': 'loss', 'content': 0.21581493318080902, 'timestamp': '2025-10-01 04:19:14.644116', 'step': 2555, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:14.697884', 'step': 2555, 'epoch': 1} {'type': 'loss', 'content': 0.16714008152484894, 'timestamp': '2025-10-01 04:19:14.706779', 'step': 2556, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:14.758922', 'step': 2556, 'epoch': 1} {'type': 'loss', 'content': 0.1378345787525177, 'timestamp': '2025-10-01 04:19:14.761138', 'step': 2557, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:19:14.813242', 'step': 2557, 'epoch': 1} {'type': 'loss', 'content': 0.14145274460315704, 'timestamp': '2025-10-01 04:19:14.815470', 'step': 2558, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:14.869095', 'step': 2558, 'epoch': 1} {'type': 'loss', 'content': 0.1040562316775322, 'timestamp': '2025-10-01 04:19:14.871596', 'step': 2559, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:14.927920', 'step': 2559, 'epoch': 1} {'type': 'loss', 'content': 0.2004980593919754, 'timestamp': '2025-10-01 04:19:14.934963', 'step': 2560, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:14.988010', 'step': 2560, 'epoch': 1} {'type': 'loss', 'content': 0.14063158631324768, 'timestamp': '2025-10-01 04:19:14.990228', 'step': 2561, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:19:15.044958', 'step': 2561, 'epoch': 1} {'type': 'loss', 'content': 0.10274840891361237, 'timestamp': '2025-10-01 04:19:15.047169', 'step': 2562, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:15.100943', 'step': 2562, 'epoch': 1} {'type': 'loss', 'content': 0.088927261531353, 'timestamp': '2025-10-01 04:19:15.103102', 'step': 2563, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:19:15.156052', 'step': 2563, 'epoch': 1} {'type': 'loss', 'content': 0.10644527524709702, 'timestamp': '2025-10-01 04:19:15.162096', 'step': 2564, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:15.214362', 'step': 2564, 'epoch': 1} {'type': 'loss', 'content': 0.17160364985466003, 'timestamp': '2025-10-01 04:19:15.216624', 'step': 2565, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:19:15.270101', 'step': 2565, 'epoch': 1} {'type': 'loss', 'content': 0.1601518839597702, 'timestamp': '2025-10-01 04:19:15.279007', 'step': 2566, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:15.333320', 'step': 2566, 'epoch': 1} {'type': 'loss', 'content': 0.15589211881160736, 'timestamp': '2025-10-01 04:19:15.335389', 'step': 2567, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:15.388764', 'step': 2567, 'epoch': 1} {'type': 'loss', 'content': 0.15985159575939178, 'timestamp': '2025-10-01 04:19:15.394718', 'step': 2568, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:15.452760', 'step': 2568, 'epoch': 1} {'type': 'loss', 'content': 0.13654924929141998, 'timestamp': '2025-10-01 04:19:15.456654', 'step': 2569, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:15.517370', 'step': 2569, 'epoch': 1} {'type': 'loss', 'content': 0.17436273396015167, 'timestamp': '2025-10-01 04:19:15.519524', 'step': 2570, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:19:15.572502', 'step': 2570, 'epoch': 1} {'type': 'loss', 'content': 0.22965212166309357, 'timestamp': '2025-10-01 04:19:15.575766', 'step': 2571, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:15.640359', 'step': 2571, 'epoch': 1} {'type': 'loss', 'content': 0.12903375923633575, 'timestamp': '2025-10-01 04:19:15.653114', 'step': 2572, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:15.705117', 'step': 2572, 'epoch': 1} {'type': 'loss', 'content': 0.11433904618024826, 'timestamp': '2025-10-01 04:19:15.707437', 'step': 2573, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:15.760911', 'step': 2573, 'epoch': 1} {'type': 'loss', 'content': 0.14274635910987854, 'timestamp': '2025-10-01 04:19:15.765032', 'step': 2574, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:19:15.819826', 'step': 2574, 'epoch': 1} {'type': 'loss', 'content': 0.1618310511112213, 'timestamp': '2025-10-01 04:19:15.822656', 'step': 2575, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:15.886644', 'step': 2575, 'epoch': 1} {'type': 'loss', 'content': 0.08830918371677399, 'timestamp': '2025-10-01 04:19:15.892495', 'step': 2576, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:15.947271', 'step': 2576, 'epoch': 1} {'type': 'loss', 'content': 0.11091522127389908, 'timestamp': '2025-10-01 04:19:15.950181', 'step': 2577, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:19:16.006948', 'step': 2577, 'epoch': 1} {'type': 'loss', 'content': 0.2658928334712982, 'timestamp': '2025-10-01 04:19:16.011779', 'step': 2578, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:16.072151', 'step': 2578, 'epoch': 1} {'type': 'loss', 'content': 0.20485085248947144, 'timestamp': '2025-10-01 04:19:16.075211', 'step': 2579, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:16.130104', 'step': 2579, 'epoch': 1} {'type': 'loss', 'content': 0.10171519219875336, 'timestamp': '2025-10-01 04:19:16.135704', 'step': 2580, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:16.190289', 'step': 2580, 'epoch': 1} {'type': 'loss', 'content': 0.17809714376926422, 'timestamp': '2025-10-01 04:19:16.192725', 'step': 2581, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:16.248442', 'step': 2581, 'epoch': 1} {'type': 'loss', 'content': 0.20258449018001556, 'timestamp': '2025-10-01 04:19:16.255776', 'step': 2582, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:19:16.308532', 'step': 2582, 'epoch': 1} {'type': 'loss', 'content': 0.1588495969772339, 'timestamp': '2025-10-01 04:19:16.310897', 'step': 2583, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:16.364036', 'step': 2583, 'epoch': 1} {'type': 'loss', 'content': 0.19466206431388855, 'timestamp': '2025-10-01 04:19:16.369953', 'step': 2584, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:19:16.425802', 'step': 2584, 'epoch': 1} {'type': 'loss', 'content': 0.1597033441066742, 'timestamp': '2025-10-01 04:19:16.431647', 'step': 2585, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:16.496619', 'step': 2585, 'epoch': 1} {'type': 'loss', 'content': 0.14802320301532745, 'timestamp': '2025-10-01 04:19:16.499142', 'step': 2586, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:16.552394', 'step': 2586, 'epoch': 1} {'type': 'loss', 'content': 0.19946540892124176, 'timestamp': '2025-10-01 04:19:16.554818', 'step': 2587, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:16.608087', 'step': 2587, 'epoch': 1} {'type': 'loss', 'content': 0.18258245289325714, 'timestamp': '2025-10-01 04:19:16.614152', 'step': 2588, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:16.668455', 'step': 2588, 'epoch': 1} {'type': 'loss', 'content': 0.1861528605222702, 'timestamp': '2025-10-01 04:19:16.670913', 'step': 2589, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:16.723492', 'step': 2589, 'epoch': 1} {'type': 'loss', 'content': 0.10838267207145691, 'timestamp': '2025-10-01 04:19:16.725570', 'step': 2590, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:16.778148', 'step': 2590, 'epoch': 1} {'type': 'loss', 'content': 0.2031259983778, 'timestamp': '2025-10-01 04:19:16.780274', 'step': 2591, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:16.832575', 'step': 2591, 'epoch': 1} {'type': 'loss', 'content': 0.17939598858356476, 'timestamp': '2025-10-01 04:19:16.838284', 'step': 2592, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:16.890264', 'step': 2592, 'epoch': 1} {'type': 'loss', 'content': 0.16699393093585968, 'timestamp': '2025-10-01 04:19:16.892359', 'step': 2593, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:16.956084', 'step': 2593, 'epoch': 1} {'type': 'loss', 'content': 0.16999438405036926, 'timestamp': '2025-10-01 04:19:16.958548', 'step': 2594, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:17.011619', 'step': 2594, 'epoch': 1} {'type': 'loss', 'content': 0.06633853167295456, 'timestamp': '2025-10-01 04:19:17.013851', 'step': 2595, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:17.066703', 'step': 2595, 'epoch': 1} {'type': 'loss', 'content': 0.3100574016571045, 'timestamp': '2025-10-01 04:19:17.072453', 'step': 2596, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:17.124395', 'step': 2596, 'epoch': 1} {'type': 'loss', 'content': 0.09282547235488892, 'timestamp': '2025-10-01 04:19:17.126831', 'step': 2597, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:17.179080', 'step': 2597, 'epoch': 1} {'type': 'loss', 'content': 0.10978280752897263, 'timestamp': '2025-10-01 04:19:17.181005', 'step': 2598, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:17.233569', 'step': 2598, 'epoch': 1} {'type': 'loss', 'content': 0.15795204043388367, 'timestamp': '2025-10-01 04:19:17.235711', 'step': 2599, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:17.287683', 'step': 2599, 'epoch': 1} {'type': 'loss', 'content': 0.21762700378894806, 'timestamp': '2025-10-01 04:19:17.293559', 'step': 2600, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:17.345970', 'step': 2600, 'epoch': 1} {'type': 'loss', 'content': 0.13586392998695374, 'timestamp': '2025-10-01 04:19:17.348232', 'step': 2601, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:17.400819', 'step': 2601, 'epoch': 1} {'type': 'loss', 'content': 0.20113573968410492, 'timestamp': '2025-10-01 04:19:17.403051', 'step': 2602, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:17.457453', 'step': 2602, 'epoch': 1} {'type': 'loss', 'content': 0.16426603496074677, 'timestamp': '2025-10-01 04:19:17.459729', 'step': 2603, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:17.511954', 'step': 2603, 'epoch': 1} {'type': 'loss', 'content': 0.15022411942481995, 'timestamp': '2025-10-01 04:19:17.517791', 'step': 2604, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:19:17.570183', 'step': 2604, 'epoch': 1} {'type': 'loss', 'content': 0.16833187639713287, 'timestamp': '2025-10-01 04:19:17.572461', 'step': 2605, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:19:17.625467', 'step': 2605, 'epoch': 1} {'type': 'loss', 'content': 0.2372989058494568, 'timestamp': '2025-10-01 04:19:17.627676', 'step': 2606, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:17.681663', 'step': 2606, 'epoch': 1} {'type': 'loss', 'content': 0.22809095680713654, 'timestamp': '2025-10-01 04:19:17.683969', 'step': 2607, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:17.736774', 'step': 2607, 'epoch': 1} {'type': 'loss', 'content': 0.10694339871406555, 'timestamp': '2025-10-01 04:19:17.742477', 'step': 2608, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:17.794441', 'step': 2608, 'epoch': 1} {'type': 'loss', 'content': 0.22494757175445557, 'timestamp': '2025-10-01 04:19:17.798136', 'step': 2609, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:17.853123', 'step': 2609, 'epoch': 1} {'type': 'loss', 'content': 0.19899974763393402, 'timestamp': '2025-10-01 04:19:17.855523', 'step': 2610, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:17.908616', 'step': 2610, 'epoch': 1} {'type': 'loss', 'content': 0.13313014805316925, 'timestamp': '2025-10-01 04:19:17.910941', 'step': 2611, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:17.963377', 'step': 2611, 'epoch': 1} {'type': 'loss', 'content': 0.14880549907684326, 'timestamp': '2025-10-01 04:19:17.969591', 'step': 2612, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:19:18.022081', 'step': 2612, 'epoch': 1} {'type': 'loss', 'content': 0.1741720587015152, 'timestamp': '2025-10-01 04:19:18.024381', 'step': 2613, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:18.078659', 'step': 2613, 'epoch': 1} {'type': 'loss', 'content': 0.1160992830991745, 'timestamp': '2025-10-01 04:19:18.080812', 'step': 2614, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:18.135804', 'step': 2614, 'epoch': 1} {'type': 'loss', 'content': 0.13113188743591309, 'timestamp': '2025-10-01 04:19:18.138087', 'step': 2615, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:18.190728', 'step': 2615, 'epoch': 1} {'type': 'loss', 'content': 0.2849941849708557, 'timestamp': '2025-10-01 04:19:18.196579', 'step': 2616, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:19:18.251227', 'step': 2616, 'epoch': 1} {'type': 'loss', 'content': 0.29756829142570496, 'timestamp': '2025-10-01 04:19:18.253853', 'step': 2617, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:18.306769', 'step': 2617, 'epoch': 1} {'type': 'loss', 'content': 0.15203507244586945, 'timestamp': '2025-10-01 04:19:18.314720', 'step': 2618, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:18.368047', 'step': 2618, 'epoch': 1} {'type': 'loss', 'content': 0.17729997634887695, 'timestamp': '2025-10-01 04:19:18.370286', 'step': 2619, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:19:18.423921', 'step': 2619, 'epoch': 1} {'type': 'loss', 'content': 0.21159258484840393, 'timestamp': '2025-10-01 04:19:18.430584', 'step': 2620, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:18.483195', 'step': 2620, 'epoch': 1} {'type': 'loss', 'content': 0.1299048662185669, 'timestamp': '2025-10-01 04:19:18.485531', 'step': 2621, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:18.537694', 'step': 2621, 'epoch': 1} {'type': 'loss', 'content': 0.11480546742677689, 'timestamp': '2025-10-01 04:19:18.540029', 'step': 2622, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:18.597485', 'step': 2622, 'epoch': 1} {'type': 'loss', 'content': 0.14014948904514313, 'timestamp': '2025-10-01 04:19:18.599617', 'step': 2623, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:18.653543', 'step': 2623, 'epoch': 1} {'type': 'loss', 'content': 0.24401021003723145, 'timestamp': '2025-10-01 04:19:18.659153', 'step': 2624, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:18.711588', 'step': 2624, 'epoch': 1} {'type': 'loss', 'content': 0.11802826076745987, 'timestamp': '2025-10-01 04:19:18.714006', 'step': 2625, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:19:18.766902', 'step': 2625, 'epoch': 1} {'type': 'loss', 'content': 0.12697246670722961, 'timestamp': '2025-10-01 04:19:18.769242', 'step': 2626, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:18.824417', 'step': 2626, 'epoch': 1} {'type': 'loss', 'content': 0.1869639903306961, 'timestamp': '2025-10-01 04:19:18.832176', 'step': 2627, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:18.885144', 'step': 2627, 'epoch': 1} {'type': 'loss', 'content': 0.16047793626785278, 'timestamp': '2025-10-01 04:19:18.905070', 'step': 2628, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:19:18.964476', 'step': 2628, 'epoch': 1} {'type': 'loss', 'content': 0.19212350249290466, 'timestamp': '2025-10-01 04:19:18.966773', 'step': 2629, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:19.019701', 'step': 2629, 'epoch': 1} {'type': 'loss', 'content': 0.09378553181886673, 'timestamp': '2025-10-01 04:19:19.022584', 'step': 2630, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:19.077656', 'step': 2630, 'epoch': 1} {'type': 'loss', 'content': 0.21258334815502167, 'timestamp': '2025-10-01 04:19:19.079825', 'step': 2631, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:19.132777', 'step': 2631, 'epoch': 1} {'type': 'loss', 'content': 0.15584567189216614, 'timestamp': '2025-10-01 04:19:19.138559', 'step': 2632, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:19.190643', 'step': 2632, 'epoch': 1} {'type': 'loss', 'content': 0.2630845606327057, 'timestamp': '2025-10-01 04:19:19.193058', 'step': 2633, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:19.247168', 'step': 2633, 'epoch': 1} {'type': 'loss', 'content': 0.16258397698402405, 'timestamp': '2025-10-01 04:19:19.260854', 'step': 2634, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:19.315043', 'step': 2634, 'epoch': 1} {'type': 'loss', 'content': 0.19129729270935059, 'timestamp': '2025-10-01 04:19:19.317664', 'step': 2635, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:19.390659', 'step': 2635, 'epoch': 1} {'type': 'loss', 'content': 0.20615725219249725, 'timestamp': '2025-10-01 04:19:19.396569', 'step': 2636, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:19.450710', 'step': 2636, 'epoch': 1} {'type': 'loss', 'content': 0.13437865674495697, 'timestamp': '2025-10-01 04:19:19.453757', 'step': 2637, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:19.506873', 'step': 2637, 'epoch': 1} {'type': 'loss', 'content': 0.14984774589538574, 'timestamp': '2025-10-01 04:19:19.508990', 'step': 2638, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:19.569003', 'step': 2638, 'epoch': 1} {'type': 'loss', 'content': 0.1584126204252243, 'timestamp': '2025-10-01 04:19:19.571074', 'step': 2639, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:19.625730', 'step': 2639, 'epoch': 1} {'type': 'loss', 'content': 0.11506485193967819, 'timestamp': '2025-10-01 04:19:19.631494', 'step': 2640, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:19.684112', 'step': 2640, 'epoch': 1} {'type': 'loss', 'content': 0.27529823780059814, 'timestamp': '2025-10-01 04:19:19.686272', 'step': 2641, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:19.738986', 'step': 2641, 'epoch': 1} {'type': 'loss', 'content': 0.20151066780090332, 'timestamp': '2025-10-01 04:19:19.741557', 'step': 2642, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:19:19.794234', 'step': 2642, 'epoch': 1} {'type': 'loss', 'content': 0.1357196867465973, 'timestamp': '2025-10-01 04:19:19.797751', 'step': 2643, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:19.850487', 'step': 2643, 'epoch': 1} {'type': 'loss', 'content': 0.0948316901922226, 'timestamp': '2025-10-01 04:19:19.856937', 'step': 2644, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:19.913428', 'step': 2644, 'epoch': 1} {'type': 'loss', 'content': 0.13880562782287598, 'timestamp': '2025-10-01 04:19:19.915593', 'step': 2645, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:19.968237', 'step': 2645, 'epoch': 1} {'type': 'loss', 'content': 0.1371232271194458, 'timestamp': '2025-10-01 04:19:19.970620', 'step': 2646, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:20.023640', 'step': 2646, 'epoch': 1} {'type': 'loss', 'content': 0.136235773563385, 'timestamp': '2025-10-01 04:19:20.025933', 'step': 2647, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:20.078494', 'step': 2647, 'epoch': 1} {'type': 'loss', 'content': 0.1534428596496582, 'timestamp': '2025-10-01 04:19:20.084187', 'step': 2648, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:20.136437', 'step': 2648, 'epoch': 1} {'type': 'loss', 'content': 0.12193261086940765, 'timestamp': '2025-10-01 04:19:20.138474', 'step': 2649, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:20.192054', 'step': 2649, 'epoch': 1} {'type': 'loss', 'content': 0.22731053829193115, 'timestamp': '2025-10-01 04:19:20.194301', 'step': 2650, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:20.246835', 'step': 2650, 'epoch': 1} {'type': 'loss', 'content': 0.23490047454833984, 'timestamp': '2025-10-01 04:19:20.249101', 'step': 2651, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:20.301438', 'step': 2651, 'epoch': 1} {'type': 'loss', 'content': 0.21341896057128906, 'timestamp': '2025-10-01 04:19:20.307111', 'step': 2652, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:20.360567', 'step': 2652, 'epoch': 1} {'type': 'loss', 'content': 0.1389484852552414, 'timestamp': '2025-10-01 04:19:20.362838', 'step': 2653, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:20.415871', 'step': 2653, 'epoch': 1} {'type': 'loss', 'content': 0.09328489005565643, 'timestamp': '2025-10-01 04:19:20.418260', 'step': 2654, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:19:20.475823', 'step': 2654, 'epoch': 1} {'type': 'loss', 'content': 0.26828524470329285, 'timestamp': '2025-10-01 04:19:20.477968', 'step': 2655, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:20.530680', 'step': 2655, 'epoch': 1} {'type': 'loss', 'content': 0.15473783016204834, 'timestamp': '2025-10-01 04:19:20.536779', 'step': 2656, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:19:20.597503', 'step': 2656, 'epoch': 1} {'type': 'loss', 'content': 0.1193695068359375, 'timestamp': '2025-10-01 04:19:20.600182', 'step': 2657, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:20.652774', 'step': 2657, 'epoch': 1} {'type': 'loss', 'content': 0.14606231451034546, 'timestamp': '2025-10-01 04:19:20.655552', 'step': 2658, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:20.708535', 'step': 2658, 'epoch': 1} {'type': 'loss', 'content': 0.1758207231760025, 'timestamp': '2025-10-01 04:19:20.711339', 'step': 2659, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:20.764863', 'step': 2659, 'epoch': 1} {'type': 'loss', 'content': 0.19192534685134888, 'timestamp': '2025-10-01 04:19:20.770762', 'step': 2660, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:20.823050', 'step': 2660, 'epoch': 1} {'type': 'loss', 'content': 0.2819913625717163, 'timestamp': '2025-10-01 04:19:20.826002', 'step': 2661, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:20.880123', 'step': 2661, 'epoch': 1} {'type': 'loss', 'content': 0.18755699694156647, 'timestamp': '2025-10-01 04:19:20.883492', 'step': 2662, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:19:20.937033', 'step': 2662, 'epoch': 1} {'type': 'loss', 'content': 0.1419629603624344, 'timestamp': '2025-10-01 04:19:20.939656', 'step': 2663, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:20.994531', 'step': 2663, 'epoch': 1} {'type': 'loss', 'content': 0.1589304655790329, 'timestamp': '2025-10-01 04:19:21.000714', 'step': 2664, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:21.053881', 'step': 2664, 'epoch': 1} {'type': 'loss', 'content': 0.19525526463985443, 'timestamp': '2025-10-01 04:19:21.056557', 'step': 2665, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:21.110362', 'step': 2665, 'epoch': 1} {'type': 'loss', 'content': 0.12557119131088257, 'timestamp': '2025-10-01 04:19:21.112807', 'step': 2666, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:19:21.166777', 'step': 2666, 'epoch': 1} {'type': 'loss', 'content': 0.19138143956661224, 'timestamp': '2025-10-01 04:19:21.169110', 'step': 2667, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:21.226215', 'step': 2667, 'epoch': 1} {'type': 'loss', 'content': 0.19882246851921082, 'timestamp': '2025-10-01 04:19:21.232458', 'step': 2668, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:19:21.285235', 'step': 2668, 'epoch': 1} {'type': 'loss', 'content': 0.22246785461902618, 'timestamp': '2025-10-01 04:19:21.287477', 'step': 2669, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:21.344110', 'step': 2669, 'epoch': 1} {'type': 'loss', 'content': 0.12661272287368774, 'timestamp': '2025-10-01 04:19:21.346357', 'step': 2670, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:21.400077', 'step': 2670, 'epoch': 1} {'type': 'loss', 'content': 0.19183818995952606, 'timestamp': '2025-10-01 04:19:21.402174', 'step': 2671, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:21.455297', 'step': 2671, 'epoch': 1} {'type': 'loss', 'content': 0.12629812955856323, 'timestamp': '2025-10-01 04:19:21.461449', 'step': 2672, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:21.514302', 'step': 2672, 'epoch': 1} {'type': 'loss', 'content': 0.148997962474823, 'timestamp': '2025-10-01 04:19:21.522709', 'step': 2673, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:21.575851', 'step': 2673, 'epoch': 1} {'type': 'loss', 'content': 0.12340133637189865, 'timestamp': '2025-10-01 04:19:21.578670', 'step': 2674, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:21.632393', 'step': 2674, 'epoch': 1} {'type': 'loss', 'content': 0.19157782196998596, 'timestamp': '2025-10-01 04:19:21.635085', 'step': 2675, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-01 04:19:21.688602', 'step': 2675, 'epoch': 1} {'type': 'loss', 'content': 0.11923551559448242, 'timestamp': '2025-10-01 04:19:21.697548', 'step': 2676, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:21.750397', 'step': 2676, 'epoch': 1} {'type': 'loss', 'content': 0.15670304000377655, 'timestamp': '2025-10-01 04:19:21.752977', 'step': 2677, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:21.806397', 'step': 2677, 'epoch': 1} {'type': 'loss', 'content': 0.11187796294689178, 'timestamp': '2025-10-01 04:19:21.808947', 'step': 2678, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:21.862289', 'step': 2678, 'epoch': 1} {'type': 'loss', 'content': 0.18522275984287262, 'timestamp': '2025-10-01 04:19:21.864780', 'step': 2679, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:21.918052', 'step': 2679, 'epoch': 1} {'type': 'loss', 'content': 0.18452653288841248, 'timestamp': '2025-10-01 04:19:21.924290', 'step': 2680, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:21.978730', 'step': 2680, 'epoch': 1} {'type': 'loss', 'content': 0.10158707946538925, 'timestamp': '2025-10-01 04:19:21.981129', 'step': 2681, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:19:22.036518', 'step': 2681, 'epoch': 1} {'type': 'loss', 'content': 0.1852385252714157, 'timestamp': '2025-10-01 04:19:22.038919', 'step': 2682, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:22.092416', 'step': 2682, 'epoch': 1} {'type': 'loss', 'content': 0.17702360451221466, 'timestamp': '2025-10-01 04:19:22.094541', 'step': 2683, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:19:22.147993', 'step': 2683, 'epoch': 1} {'type': 'loss', 'content': 0.2029503583908081, 'timestamp': '2025-10-01 04:19:22.154333', 'step': 2684, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:22.207791', 'step': 2684, 'epoch': 1} {'type': 'loss', 'content': 0.13105948269367218, 'timestamp': '2025-10-01 04:19:22.210559', 'step': 2685, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:22.265475', 'step': 2685, 'epoch': 1} {'type': 'loss', 'content': 0.1780354231595993, 'timestamp': '2025-10-01 04:19:22.267601', 'step': 2686, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:22.328669', 'step': 2686, 'epoch': 1} {'type': 'loss', 'content': 0.15000368654727936, 'timestamp': '2025-10-01 04:19:22.337269', 'step': 2687, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:22.400777', 'step': 2687, 'epoch': 1} {'type': 'loss', 'content': 0.12317181378602982, 'timestamp': '2025-10-01 04:19:22.407302', 'step': 2688, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:22.461884', 'step': 2688, 'epoch': 1} {'type': 'loss', 'content': 0.1391843557357788, 'timestamp': '2025-10-01 04:19:22.464975', 'step': 2689, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:22.517760', 'step': 2689, 'epoch': 1} {'type': 'loss', 'content': 0.17331239581108093, 'timestamp': '2025-10-01 04:19:22.520193', 'step': 2690, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:22.573429', 'step': 2690, 'epoch': 1} {'type': 'loss', 'content': 0.19820669293403625, 'timestamp': '2025-10-01 04:19:22.575978', 'step': 2691, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:22.629559', 'step': 2691, 'epoch': 1} {'type': 'loss', 'content': 0.22125306725502014, 'timestamp': '2025-10-01 04:19:22.635249', 'step': 2692, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:22.687973', 'step': 2692, 'epoch': 1} {'type': 'loss', 'content': 0.2194632738828659, 'timestamp': '2025-10-01 04:19:22.690824', 'step': 2693, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:19:22.744146', 'step': 2693, 'epoch': 1} {'type': 'loss', 'content': 0.16591687500476837, 'timestamp': '2025-10-01 04:19:22.746329', 'step': 2694, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:22.798854', 'step': 2694, 'epoch': 1} {'type': 'loss', 'content': 0.14829471707344055, 'timestamp': '2025-10-01 04:19:22.800985', 'step': 2695, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:22.853766', 'step': 2695, 'epoch': 1} {'type': 'loss', 'content': 0.1351117491722107, 'timestamp': '2025-10-01 04:19:22.859420', 'step': 2696, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:22.911412', 'step': 2696, 'epoch': 1} {'type': 'loss', 'content': 0.12410198152065277, 'timestamp': '2025-10-01 04:19:22.913819', 'step': 2697, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:19:22.966190', 'step': 2697, 'epoch': 1} {'type': 'loss', 'content': 0.17155857384204865, 'timestamp': '2025-10-01 04:19:22.968710', 'step': 2698, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:19:23.021934', 'step': 2698, 'epoch': 1} {'type': 'loss', 'content': 0.20205342769622803, 'timestamp': '2025-10-01 04:19:23.024343', 'step': 2699, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:23.079275', 'step': 2699, 'epoch': 1} {'type': 'loss', 'content': 0.19930976629257202, 'timestamp': '2025-10-01 04:19:23.091992', 'step': 2700, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:23.149104', 'step': 2700, 'epoch': 1} {'type': 'loss', 'content': 0.11768245697021484, 'timestamp': '2025-10-01 04:19:23.151212', 'step': 2701, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:23.203674', 'step': 2701, 'epoch': 1} {'type': 'loss', 'content': 0.18562637269496918, 'timestamp': '2025-10-01 04:19:23.205817', 'step': 2702, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:23.263085', 'step': 2702, 'epoch': 1} {'type': 'loss', 'content': 0.272148996591568, 'timestamp': '2025-10-01 04:19:23.273335', 'step': 2703, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:23.327949', 'step': 2703, 'epoch': 1} {'type': 'loss', 'content': 0.16169264912605286, 'timestamp': '2025-10-01 04:19:23.334285', 'step': 2704, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:23.387408', 'step': 2704, 'epoch': 1} {'type': 'loss', 'content': 0.20672902464866638, 'timestamp': '2025-10-01 04:19:23.390781', 'step': 2705, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:23.444450', 'step': 2705, 'epoch': 1} {'type': 'loss', 'content': 0.16381530463695526, 'timestamp': '2025-10-01 04:19:23.448645', 'step': 2706, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:23.504238', 'step': 2706, 'epoch': 1} {'type': 'loss', 'content': 0.2543753683567047, 'timestamp': '2025-10-01 04:19:23.506452', 'step': 2707, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:19:23.558893', 'step': 2707, 'epoch': 1} {'type': 'loss', 'content': 0.20115183293819427, 'timestamp': '2025-10-01 04:19:23.564466', 'step': 2708, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:19:23.642686', 'step': 2708, 'epoch': 1} {'type': 'loss', 'content': 0.21393956243991852, 'timestamp': '2025-10-01 04:19:23.652062', 'step': 2709, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:23.704351', 'step': 2709, 'epoch': 1} {'type': 'loss', 'content': 0.23499780893325806, 'timestamp': '2025-10-01 04:19:23.706598', 'step': 2710, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:23.759315', 'step': 2710, 'epoch': 1} {'type': 'loss', 'content': 0.1710829734802246, 'timestamp': '2025-10-01 04:19:23.761491', 'step': 2711, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:23.813970', 'step': 2711, 'epoch': 1} {'type': 'loss', 'content': 0.21699748933315277, 'timestamp': '2025-10-01 04:19:23.819774', 'step': 2712, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:19:23.872117', 'step': 2712, 'epoch': 1} {'type': 'loss', 'content': 0.22864891588687897, 'timestamp': '2025-10-01 04:19:23.874671', 'step': 2713, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:23.926577', 'step': 2713, 'epoch': 1} {'type': 'loss', 'content': 0.13250429928302765, 'timestamp': '2025-10-01 04:19:23.929283', 'step': 2714, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:23.983670', 'step': 2714, 'epoch': 1} {'type': 'loss', 'content': 0.18468314409255981, 'timestamp': '2025-10-01 04:19:23.986046', 'step': 2715, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:19:24.038693', 'step': 2715, 'epoch': 1} {'type': 'loss', 'content': 0.12047717720270157, 'timestamp': '2025-10-01 04:19:24.044931', 'step': 2716, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:24.099262', 'step': 2716, 'epoch': 1} {'type': 'loss', 'content': 0.13385312259197235, 'timestamp': '2025-10-01 04:19:24.101450', 'step': 2717, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:24.154972', 'step': 2717, 'epoch': 1} {'type': 'loss', 'content': 0.2020934671163559, 'timestamp': '2025-10-01 04:19:24.159077', 'step': 2718, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:24.216390', 'step': 2718, 'epoch': 1} {'type': 'loss', 'content': 0.13402844965457916, 'timestamp': '2025-10-01 04:19:24.218566', 'step': 2719, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:24.271300', 'step': 2719, 'epoch': 1} {'type': 'loss', 'content': 0.2960245907306671, 'timestamp': '2025-10-01 04:19:24.277011', 'step': 2720, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:19:24.329589', 'step': 2720, 'epoch': 1} {'type': 'loss', 'content': 0.22705020010471344, 'timestamp': '2025-10-01 04:19:24.333607', 'step': 2721, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:24.387614', 'step': 2721, 'epoch': 1} {'type': 'loss', 'content': 0.1635112762451172, 'timestamp': '2025-10-01 04:19:24.389826', 'step': 2722, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:24.442170', 'step': 2722, 'epoch': 1} {'type': 'loss', 'content': 0.19558481872081757, 'timestamp': '2025-10-01 04:19:24.444337', 'step': 2723, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:24.497494', 'step': 2723, 'epoch': 1} {'type': 'loss', 'content': 0.1993998885154724, 'timestamp': '2025-10-01 04:19:24.502990', 'step': 2724, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:24.554894', 'step': 2724, 'epoch': 1} {'type': 'loss', 'content': 0.2199496179819107, 'timestamp': '2025-10-01 04:19:24.557048', 'step': 2725, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:24.612439', 'step': 2725, 'epoch': 1} {'type': 'loss', 'content': 0.12331937998533249, 'timestamp': '2025-10-01 04:19:24.617003', 'step': 2726, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:24.671784', 'step': 2726, 'epoch': 1} {'type': 'loss', 'content': 0.1737133413553238, 'timestamp': '2025-10-01 04:19:24.675653', 'step': 2727, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:24.728067', 'step': 2727, 'epoch': 1} {'type': 'loss', 'content': 0.251715749502182, 'timestamp': '2025-10-01 04:19:24.733828', 'step': 2728, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:24.787596', 'step': 2728, 'epoch': 1} {'type': 'loss', 'content': 0.1505172848701477, 'timestamp': '2025-10-01 04:19:24.789799', 'step': 2729, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:24.842764', 'step': 2729, 'epoch': 1} {'type': 'loss', 'content': 0.1471284031867981, 'timestamp': '2025-10-01 04:19:24.846760', 'step': 2730, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:24.903220', 'step': 2730, 'epoch': 1} {'type': 'loss', 'content': 0.24554723501205444, 'timestamp': '2025-10-01 04:19:24.910374', 'step': 2731, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:24.971540', 'step': 2731, 'epoch': 1} {'type': 'loss', 'content': 0.1614300161600113, 'timestamp': '2025-10-01 04:19:24.979119', 'step': 2732, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:19:25.037236', 'step': 2732, 'epoch': 1} {'type': 'loss', 'content': 0.17178624868392944, 'timestamp': '2025-10-01 04:19:25.039580', 'step': 2733, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:25.093111', 'step': 2733, 'epoch': 1} {'type': 'loss', 'content': 0.19031073153018951, 'timestamp': '2025-10-01 04:19:25.095561', 'step': 2734, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:25.148455', 'step': 2734, 'epoch': 1} {'type': 'loss', 'content': 0.2028891146183014, 'timestamp': '2025-10-01 04:19:25.150534', 'step': 2735, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:25.205261', 'step': 2735, 'epoch': 1} {'type': 'loss', 'content': 0.18849529325962067, 'timestamp': '2025-10-01 04:19:25.210894', 'step': 2736, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:19:25.263000', 'step': 2736, 'epoch': 1} {'type': 'loss', 'content': 0.14616863429546356, 'timestamp': '2025-10-01 04:19:25.265159', 'step': 2737, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:25.317175', 'step': 2737, 'epoch': 1} {'type': 'loss', 'content': 0.20757178962230682, 'timestamp': '2025-10-01 04:19:25.320144', 'step': 2738, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:25.380001', 'step': 2738, 'epoch': 1} {'type': 'loss', 'content': 0.12409087270498276, 'timestamp': '2025-10-01 04:19:25.382208', 'step': 2739, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:25.435336', 'step': 2739, 'epoch': 1} {'type': 'loss', 'content': 0.18392214179039001, 'timestamp': '2025-10-01 04:19:25.440942', 'step': 2740, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:19:25.495417', 'step': 2740, 'epoch': 1} {'type': 'loss', 'content': 0.17675061523914337, 'timestamp': '2025-10-01 04:19:25.497448', 'step': 2741, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:25.550483', 'step': 2741, 'epoch': 1} {'type': 'loss', 'content': 0.11553175002336502, 'timestamp': '2025-10-01 04:19:25.552569', 'step': 2742, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:25.605386', 'step': 2742, 'epoch': 1} {'type': 'loss', 'content': 0.13493278622627258, 'timestamp': '2025-10-01 04:19:25.607658', 'step': 2743, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:19:25.662335', 'step': 2743, 'epoch': 1} {'type': 'loss', 'content': 0.1553545743227005, 'timestamp': '2025-10-01 04:19:25.668025', 'step': 2744, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:25.720533', 'step': 2744, 'epoch': 1} {'type': 'loss', 'content': 0.242412269115448, 'timestamp': '2025-10-01 04:19:25.724265', 'step': 2745, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:25.777615', 'step': 2745, 'epoch': 1} {'type': 'loss', 'content': 0.14499357342720032, 'timestamp': '2025-10-01 04:19:25.780184', 'step': 2746, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:19:25.832828', 'step': 2746, 'epoch': 1} {'type': 'loss', 'content': 0.1486140936613083, 'timestamp': '2025-10-01 04:19:25.841806', 'step': 2747, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:25.900036', 'step': 2747, 'epoch': 1} {'type': 'loss', 'content': 0.16266845166683197, 'timestamp': '2025-10-01 04:19:25.905955', 'step': 2748, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-01 04:19:25.958358', 'step': 2748, 'epoch': 1} {'type': 'loss', 'content': 0.20663760602474213, 'timestamp': '2025-10-01 04:19:25.960433', 'step': 2749, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:26.013669', 'step': 2749, 'epoch': 1} {'type': 'loss', 'content': 0.11697432398796082, 'timestamp': '2025-10-01 04:19:26.017555', 'step': 2750, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:26.071399', 'step': 2750, 'epoch': 1} {'type': 'loss', 'content': 0.23828144371509552, 'timestamp': '2025-10-01 04:19:26.073685', 'step': 2751, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:19:26.127203', 'step': 2751, 'epoch': 1} {'type': 'loss', 'content': 0.11393796652555466, 'timestamp': '2025-10-01 04:19:26.132906', 'step': 2752, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:19:26.184663', 'step': 2752, 'epoch': 1} {'type': 'loss', 'content': 0.22644270956516266, 'timestamp': '2025-10-01 04:19:26.186837', 'step': 2753, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:26.239055', 'step': 2753, 'epoch': 1} {'type': 'loss', 'content': 0.25982606410980225, 'timestamp': '2025-10-01 04:19:26.241095', 'step': 2754, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:26.293694', 'step': 2754, 'epoch': 1} {'type': 'loss', 'content': 0.21480967104434967, 'timestamp': '2025-10-01 04:19:26.296084', 'step': 2755, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:26.348658', 'step': 2755, 'epoch': 1} {'type': 'loss', 'content': 0.12387435883283615, 'timestamp': '2025-10-01 04:19:26.354620', 'step': 2756, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:26.406454', 'step': 2756, 'epoch': 1} {'type': 'loss', 'content': 0.1764443814754486, 'timestamp': '2025-10-01 04:19:26.409505', 'step': 2757, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:26.462482', 'step': 2757, 'epoch': 1} {'type': 'loss', 'content': 0.20834068953990936, 'timestamp': '2025-10-01 04:19:26.465311', 'step': 2758, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:19:26.518050', 'step': 2758, 'epoch': 1} {'type': 'loss', 'content': 0.15449100732803345, 'timestamp': '2025-10-01 04:19:26.520426', 'step': 2759, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:19:26.572768', 'step': 2759, 'epoch': 1} {'type': 'loss', 'content': 0.10479070246219635, 'timestamp': '2025-10-01 04:19:26.578469', 'step': 2760, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:26.630466', 'step': 2760, 'epoch': 1} {'type': 'loss', 'content': 0.1448591649532318, 'timestamp': '2025-10-01 04:19:26.634332', 'step': 2761, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:26.687079', 'step': 2761, 'epoch': 1} {'type': 'loss', 'content': 0.15881237387657166, 'timestamp': '2025-10-01 04:19:26.689879', 'step': 2762, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:19:26.742769', 'step': 2762, 'epoch': 1} {'type': 'loss', 'content': 0.19619226455688477, 'timestamp': '2025-10-01 04:19:26.744909', 'step': 2763, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:26.796916', 'step': 2763, 'epoch': 1} {'type': 'loss', 'content': 0.19428196549415588, 'timestamp': '2025-10-01 04:19:26.802731', 'step': 2764, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:26.855189', 'step': 2764, 'epoch': 1} {'type': 'loss', 'content': 0.16437512636184692, 'timestamp': '2025-10-01 04:19:26.857385', 'step': 2765, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:19:26.910189', 'step': 2765, 'epoch': 1} {'type': 'loss', 'content': 0.20399338006973267, 'timestamp': '2025-10-01 04:19:26.912295', 'step': 2766, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:19:26.965439', 'step': 2766, 'epoch': 1} {'type': 'loss', 'content': 0.18757086992263794, 'timestamp': '2025-10-01 04:19:26.969052', 'step': 2767, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:27.023068', 'step': 2767, 'epoch': 1} {'type': 'loss', 'content': 0.17842121422290802, 'timestamp': '2025-10-01 04:19:27.028631', 'step': 2768, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:27.080687', 'step': 2768, 'epoch': 1} {'type': 'loss', 'content': 0.14677934348583221, 'timestamp': '2025-10-01 04:19:27.082927', 'step': 2769, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:27.135727', 'step': 2769, 'epoch': 1} {'type': 'loss', 'content': 0.1981748640537262, 'timestamp': '2025-10-01 04:19:27.137772', 'step': 2770, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:27.190330', 'step': 2770, 'epoch': 1} {'type': 'loss', 'content': 0.1189347580075264, 'timestamp': '2025-10-01 04:19:27.192465', 'step': 2771, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:27.244989', 'step': 2771, 'epoch': 1} {'type': 'loss', 'content': 0.269469678401947, 'timestamp': '2025-10-01 04:19:27.253431', 'step': 2772, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:27.306707', 'step': 2772, 'epoch': 1} {'type': 'loss', 'content': 0.08240877836942673, 'timestamp': '2025-10-01 04:19:27.310189', 'step': 2773, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:27.362801', 'step': 2773, 'epoch': 1} {'type': 'loss', 'content': 0.22113044559955597, 'timestamp': '2025-10-01 04:19:27.365993', 'step': 2774, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:19:27.419021', 'step': 2774, 'epoch': 1} {'type': 'loss', 'content': 0.17615516483783722, 'timestamp': '2025-10-01 04:19:27.421006', 'step': 2775, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:27.474507', 'step': 2775, 'epoch': 1} {'type': 'loss', 'content': 0.18048013746738434, 'timestamp': '2025-10-01 04:19:27.480466', 'step': 2776, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:19:27.532992', 'step': 2776, 'epoch': 1} {'type': 'loss', 'content': 0.14349961280822754, 'timestamp': '2025-10-01 04:19:27.535821', 'step': 2777, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:27.591314', 'step': 2777, 'epoch': 1} {'type': 'loss', 'content': 0.13700082898139954, 'timestamp': '2025-10-01 04:19:27.593832', 'step': 2778, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:27.646627', 'step': 2778, 'epoch': 1} {'type': 'loss', 'content': 0.1183023452758789, 'timestamp': '2025-10-01 04:19:27.648786', 'step': 2779, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:27.701316', 'step': 2779, 'epoch': 1} {'type': 'loss', 'content': 0.2539822459220886, 'timestamp': '2025-10-01 04:19:27.706832', 'step': 2780, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:27.758899', 'step': 2780, 'epoch': 1} {'type': 'loss', 'content': 0.16135363280773163, 'timestamp': '2025-10-01 04:19:27.761020', 'step': 2781, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:27.813309', 'step': 2781, 'epoch': 1} {'type': 'loss', 'content': 0.1486651599407196, 'timestamp': '2025-10-01 04:19:27.815769', 'step': 2782, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:27.868765', 'step': 2782, 'epoch': 1} {'type': 'loss', 'content': 0.21390140056610107, 'timestamp': '2025-10-01 04:19:27.872461', 'step': 2783, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:27.924785', 'step': 2783, 'epoch': 1} {'type': 'loss', 'content': 0.23065705597400665, 'timestamp': '2025-10-01 04:19:27.930383', 'step': 2784, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:27.982880', 'step': 2784, 'epoch': 1} {'type': 'loss', 'content': 0.09202047437429428, 'timestamp': '2025-10-01 04:19:27.984978', 'step': 2785, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:28.037353', 'step': 2785, 'epoch': 1} {'type': 'loss', 'content': 0.25807949900627136, 'timestamp': '2025-10-01 04:19:28.039474', 'step': 2786, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:28.093360', 'step': 2786, 'epoch': 1} {'type': 'loss', 'content': 0.1920890212059021, 'timestamp': '2025-10-01 04:19:28.095448', 'step': 2787, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:28.147739', 'step': 2787, 'epoch': 1} {'type': 'loss', 'content': 0.18160833418369293, 'timestamp': '2025-10-01 04:19:28.154248', 'step': 2788, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:19:28.206247', 'step': 2788, 'epoch': 1} {'type': 'loss', 'content': 0.13997939229011536, 'timestamp': '2025-10-01 04:19:28.208949', 'step': 2789, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:28.262214', 'step': 2789, 'epoch': 1} {'type': 'loss', 'content': 0.21088813245296478, 'timestamp': '2025-10-01 04:19:28.264878', 'step': 2790, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:28.317810', 'step': 2790, 'epoch': 1} {'type': 'loss', 'content': 0.14427264034748077, 'timestamp': '2025-10-01 04:19:28.320190', 'step': 2791, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:19:28.372589', 'step': 2791, 'epoch': 1} {'type': 'loss', 'content': 0.3469521105289459, 'timestamp': '2025-10-01 04:19:28.378521', 'step': 2792, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:28.431075', 'step': 2792, 'epoch': 1} {'type': 'loss', 'content': 0.14835049211978912, 'timestamp': '2025-10-01 04:19:28.433166', 'step': 2793, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:19:28.486179', 'step': 2793, 'epoch': 1} {'type': 'loss', 'content': 0.1816733181476593, 'timestamp': '2025-10-01 04:19:28.488823', 'step': 2794, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:28.542905', 'step': 2794, 'epoch': 1} {'type': 'loss', 'content': 0.16449959576129913, 'timestamp': '2025-10-01 04:19:28.545159', 'step': 2795, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:28.599406', 'step': 2795, 'epoch': 1} {'type': 'loss', 'content': 0.15163291990756989, 'timestamp': '2025-10-01 04:19:28.605901', 'step': 2796, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-01 04:19:41.969470', 'step': 2796, 'epoch': 1} {'type': 'pplx', 'content': 10960.728335568954, 'timestamp': '2025-10-01 04:19:41.972275', 'step': 2796, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:42.024117', 'step': 2796, 'epoch': 1} {'type': 'loss', 'content': 0.2699272632598877, 'timestamp': '2025-10-01 04:19:42.026286', 'step': 2797, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:19:42.080459', 'step': 2797, 'epoch': 1} {'type': 'loss', 'content': 0.20302894711494446, 'timestamp': '2025-10-01 04:19:42.082569', 'step': 2798, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:42.136315', 'step': 2798, 'epoch': 1} {'type': 'loss', 'content': 0.11292636394500732, 'timestamp': '2025-10-01 04:19:42.138431', 'step': 2799, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:42.191425', 'step': 2799, 'epoch': 1} {'type': 'loss', 'content': 0.16112476587295532, 'timestamp': '2025-10-01 04:19:42.197912', 'step': 2800, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:19:42.250752', 'step': 2800, 'epoch': 1} {'type': 'loss', 'content': 0.11552289128303528, 'timestamp': '2025-10-01 04:19:42.252961', 'step': 2801, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:42.305947', 'step': 2801, 'epoch': 1} {'type': 'loss', 'content': 0.14482256770133972, 'timestamp': '2025-10-01 04:19:42.308077', 'step': 2802, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:19:42.370695', 'step': 2802, 'epoch': 1} {'type': 'loss', 'content': 0.12136031687259674, 'timestamp': '2025-10-01 04:19:42.372775', 'step': 2803, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:42.426679', 'step': 2803, 'epoch': 1} {'type': 'loss', 'content': 0.1842755824327469, 'timestamp': '2025-10-01 04:19:42.432554', 'step': 2804, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:42.485746', 'step': 2804, 'epoch': 1} {'type': 'loss', 'content': 0.17657926678657532, 'timestamp': '2025-10-01 04:19:42.487872', 'step': 2805, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:42.541542', 'step': 2805, 'epoch': 1} {'type': 'loss', 'content': 0.12576155364513397, 'timestamp': '2025-10-01 04:19:42.543719', 'step': 2806, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:19:42.597367', 'step': 2806, 'epoch': 1} {'type': 'loss', 'content': 0.2274499386548996, 'timestamp': '2025-10-01 04:19:42.599597', 'step': 2807, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:42.652310', 'step': 2807, 'epoch': 1} {'type': 'loss', 'content': 0.22042715549468994, 'timestamp': '2025-10-01 04:19:42.657900', 'step': 2808, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:42.714240', 'step': 2808, 'epoch': 1} {'type': 'loss', 'content': 0.43989628553390503, 'timestamp': '2025-10-01 04:19:42.716149', 'step': 2809, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:42.770116', 'step': 2809, 'epoch': 1} {'type': 'loss', 'content': 0.16749560832977295, 'timestamp': '2025-10-01 04:19:42.772287', 'step': 2810, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:42.827821', 'step': 2810, 'epoch': 1} {'type': 'loss', 'content': 0.1342882663011551, 'timestamp': '2025-10-01 04:19:42.830021', 'step': 2811, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:42.882757', 'step': 2811, 'epoch': 1} {'type': 'loss', 'content': 0.11265413463115692, 'timestamp': '2025-10-01 04:19:42.896518', 'step': 2812, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:19:42.949164', 'step': 2812, 'epoch': 1} {'type': 'loss', 'content': 0.1905301958322525, 'timestamp': '2025-10-01 04:19:42.951390', 'step': 2813, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:43.005189', 'step': 2813, 'epoch': 1} {'type': 'loss', 'content': 0.14605160057544708, 'timestamp': '2025-10-01 04:19:43.007473', 'step': 2814, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:43.061877', 'step': 2814, 'epoch': 1} {'type': 'loss', 'content': 0.2053062468767166, 'timestamp': '2025-10-01 04:19:43.065049', 'step': 2815, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:43.122338', 'step': 2815, 'epoch': 1} {'type': 'loss', 'content': 0.19227153062820435, 'timestamp': '2025-10-01 04:19:43.128365', 'step': 2816, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:43.180981', 'step': 2816, 'epoch': 1} {'type': 'loss', 'content': 0.134205162525177, 'timestamp': '2025-10-01 04:19:43.187239', 'step': 2817, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:43.246413', 'step': 2817, 'epoch': 1} {'type': 'loss', 'content': 0.1446218490600586, 'timestamp': '2025-10-01 04:19:43.248763', 'step': 2818, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:19:43.302006', 'step': 2818, 'epoch': 1} {'type': 'loss', 'content': 0.29361051321029663, 'timestamp': '2025-10-01 04:19:43.304490', 'step': 2819, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:43.362970', 'step': 2819, 'epoch': 1} {'type': 'loss', 'content': 0.19094057381153107, 'timestamp': '2025-10-01 04:19:43.369283', 'step': 2820, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:43.433212', 'step': 2820, 'epoch': 1} {'type': 'loss', 'content': 0.17164327204227448, 'timestamp': '2025-10-01 04:19:43.435497', 'step': 2821, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:43.489044', 'step': 2821, 'epoch': 1} {'type': 'loss', 'content': 0.26070550084114075, 'timestamp': '2025-10-01 04:19:43.491383', 'step': 2822, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:43.550231', 'step': 2822, 'epoch': 1} {'type': 'loss', 'content': 0.19480708241462708, 'timestamp': '2025-10-01 04:19:43.552469', 'step': 2823, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:43.606542', 'step': 2823, 'epoch': 1} {'type': 'loss', 'content': 0.09713587164878845, 'timestamp': '2025-10-01 04:19:43.620418', 'step': 2824, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:43.674683', 'step': 2824, 'epoch': 1} {'type': 'loss', 'content': 0.14777396619319916, 'timestamp': '2025-10-01 04:19:43.677589', 'step': 2825, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:19:43.732697', 'step': 2825, 'epoch': 1} {'type': 'loss', 'content': 0.17412765324115753, 'timestamp': '2025-10-01 04:19:43.734902', 'step': 2826, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:43.789451', 'step': 2826, 'epoch': 1} {'type': 'loss', 'content': 0.14351516962051392, 'timestamp': '2025-10-01 04:19:43.792468', 'step': 2827, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:19:43.846985', 'step': 2827, 'epoch': 1} {'type': 'loss', 'content': 0.09122191369533539, 'timestamp': '2025-10-01 04:19:43.853452', 'step': 2828, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:43.907201', 'step': 2828, 'epoch': 1} {'type': 'loss', 'content': 0.10469946265220642, 'timestamp': '2025-10-01 04:19:43.909636', 'step': 2829, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:43.963682', 'step': 2829, 'epoch': 1} {'type': 'loss', 'content': 0.2748245298862457, 'timestamp': '2025-10-01 04:19:43.965494', 'step': 2830, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:44.018376', 'step': 2830, 'epoch': 1} {'type': 'loss', 'content': 0.1911362260580063, 'timestamp': '2025-10-01 04:19:44.020594', 'step': 2831, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:44.073505', 'step': 2831, 'epoch': 1} {'type': 'loss', 'content': 0.17024442553520203, 'timestamp': '2025-10-01 04:19:44.079875', 'step': 2832, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:44.132460', 'step': 2832, 'epoch': 1} {'type': 'loss', 'content': 0.11332722753286362, 'timestamp': '2025-10-01 04:19:44.134729', 'step': 2833, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:44.187897', 'step': 2833, 'epoch': 1} {'type': 'loss', 'content': 0.1290387362241745, 'timestamp': '2025-10-01 04:19:44.190481', 'step': 2834, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:19:44.244700', 'step': 2834, 'epoch': 1} {'type': 'loss', 'content': 0.13055771589279175, 'timestamp': '2025-10-01 04:19:44.248308', 'step': 2835, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:44.301556', 'step': 2835, 'epoch': 1} {'type': 'loss', 'content': 0.1900930404663086, 'timestamp': '2025-10-01 04:19:44.311679', 'step': 2836, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:44.364336', 'step': 2836, 'epoch': 1} {'type': 'loss', 'content': 0.22071625292301178, 'timestamp': '2025-10-01 04:19:44.366552', 'step': 2837, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:44.421952', 'step': 2837, 'epoch': 1} {'type': 'loss', 'content': 0.19757163524627686, 'timestamp': '2025-10-01 04:19:44.425852', 'step': 2838, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:44.496605', 'step': 2838, 'epoch': 1} {'type': 'loss', 'content': 0.15764932334423065, 'timestamp': '2025-10-01 04:19:44.499037', 'step': 2839, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:44.553097', 'step': 2839, 'epoch': 1} {'type': 'loss', 'content': 0.18256916105747223, 'timestamp': '2025-10-01 04:19:44.559160', 'step': 2840, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:44.612521', 'step': 2840, 'epoch': 1} {'type': 'loss', 'content': 0.2881036698818207, 'timestamp': '2025-10-01 04:19:44.614878', 'step': 2841, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:44.669906', 'step': 2841, 'epoch': 1} {'type': 'loss', 'content': 0.1609533578157425, 'timestamp': '2025-10-01 04:19:44.672186', 'step': 2842, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:19:44.729186', 'step': 2842, 'epoch': 1} {'type': 'loss', 'content': 0.14409540593624115, 'timestamp': '2025-10-01 04:19:44.731852', 'step': 2843, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:44.791411', 'step': 2843, 'epoch': 1} {'type': 'loss', 'content': 0.15455695986747742, 'timestamp': '2025-10-01 04:19:44.798500', 'step': 2844, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:44.851055', 'step': 2844, 'epoch': 1} {'type': 'loss', 'content': 0.20647141337394714, 'timestamp': '2025-10-01 04:19:44.853418', 'step': 2845, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:44.907508', 'step': 2845, 'epoch': 1} {'type': 'loss', 'content': 0.1618354469537735, 'timestamp': '2025-10-01 04:19:44.909624', 'step': 2846, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:44.963946', 'step': 2846, 'epoch': 1} {'type': 'loss', 'content': 0.1689617931842804, 'timestamp': '2025-10-01 04:19:44.966157', 'step': 2847, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:45.020972', 'step': 2847, 'epoch': 1} {'type': 'loss', 'content': 0.13782261312007904, 'timestamp': '2025-10-01 04:19:45.027302', 'step': 2848, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:45.079992', 'step': 2848, 'epoch': 1} {'type': 'loss', 'content': 0.12277321517467499, 'timestamp': '2025-10-01 04:19:45.082146', 'step': 2849, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:45.150590', 'step': 2849, 'epoch': 1} {'type': 'loss', 'content': 0.1215720996260643, 'timestamp': '2025-10-01 04:19:45.152663', 'step': 2850, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:45.206314', 'step': 2850, 'epoch': 1} {'type': 'loss', 'content': 0.1499454230070114, 'timestamp': '2025-10-01 04:19:45.208711', 'step': 2851, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:45.264176', 'step': 2851, 'epoch': 1} {'type': 'loss', 'content': 0.14597181975841522, 'timestamp': '2025-10-01 04:19:45.272872', 'step': 2852, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:45.326882', 'step': 2852, 'epoch': 1} {'type': 'loss', 'content': 0.14082695543766022, 'timestamp': '2025-10-01 04:19:45.329284', 'step': 2853, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:45.382778', 'step': 2853, 'epoch': 1} {'type': 'loss', 'content': 0.1745947152376175, 'timestamp': '2025-10-01 04:19:45.385043', 'step': 2854, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:45.439412', 'step': 2854, 'epoch': 1} {'type': 'loss', 'content': 0.2188693732023239, 'timestamp': '2025-10-01 04:19:45.441797', 'step': 2855, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:45.496266', 'step': 2855, 'epoch': 1} {'type': 'loss', 'content': 0.16595324873924255, 'timestamp': '2025-10-01 04:19:45.502409', 'step': 2856, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:45.554940', 'step': 2856, 'epoch': 1} {'type': 'loss', 'content': 0.22887811064720154, 'timestamp': '2025-10-01 04:19:45.557774', 'step': 2857, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:45.621940', 'step': 2857, 'epoch': 1} {'type': 'loss', 'content': 0.13976189494132996, 'timestamp': '2025-10-01 04:19:45.624425', 'step': 2858, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:45.688442', 'step': 2858, 'epoch': 1} {'type': 'loss', 'content': 0.29755693674087524, 'timestamp': '2025-10-01 04:19:45.690832', 'step': 2859, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:45.743777', 'step': 2859, 'epoch': 1} {'type': 'loss', 'content': 0.19742731750011444, 'timestamp': '2025-10-01 04:19:45.749543', 'step': 2860, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:45.802426', 'step': 2860, 'epoch': 1} {'type': 'loss', 'content': 0.17033828794956207, 'timestamp': '2025-10-01 04:19:45.804846', 'step': 2861, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:45.857162', 'step': 2861, 'epoch': 1} {'type': 'loss', 'content': 0.19595441222190857, 'timestamp': '2025-10-01 04:19:45.861255', 'step': 2862, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:45.914238', 'step': 2862, 'epoch': 1} {'type': 'loss', 'content': 0.07208944857120514, 'timestamp': '2025-10-01 04:19:45.916396', 'step': 2863, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:45.969085', 'step': 2863, 'epoch': 1} {'type': 'loss', 'content': 0.1263398975133896, 'timestamp': '2025-10-01 04:19:45.974973', 'step': 2864, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:46.037654', 'step': 2864, 'epoch': 1} {'type': 'loss', 'content': 0.22931914031505585, 'timestamp': '2025-10-01 04:19:46.047133', 'step': 2865, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:46.099591', 'step': 2865, 'epoch': 1} {'type': 'loss', 'content': 0.2058931142091751, 'timestamp': '2025-10-01 04:19:46.102094', 'step': 2866, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:46.154855', 'step': 2866, 'epoch': 1} {'type': 'loss', 'content': 0.20491009950637817, 'timestamp': '2025-10-01 04:19:46.157486', 'step': 2867, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:46.212286', 'step': 2867, 'epoch': 1} {'type': 'loss', 'content': 0.15367209911346436, 'timestamp': '2025-10-01 04:19:46.218019', 'step': 2868, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:19:46.271789', 'step': 2868, 'epoch': 1} {'type': 'loss', 'content': 0.19607509672641754, 'timestamp': '2025-10-01 04:19:46.274782', 'step': 2869, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:46.327556', 'step': 2869, 'epoch': 1} {'type': 'loss', 'content': 0.13243848085403442, 'timestamp': '2025-10-01 04:19:46.330115', 'step': 2870, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:46.383173', 'step': 2870, 'epoch': 1} {'type': 'loss', 'content': 0.19174697995185852, 'timestamp': '2025-10-01 04:19:46.385564', 'step': 2871, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:46.439121', 'step': 2871, 'epoch': 1} {'type': 'loss', 'content': 0.1279241293668747, 'timestamp': '2025-10-01 04:19:46.445644', 'step': 2872, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:46.499850', 'step': 2872, 'epoch': 1} {'type': 'loss', 'content': 0.23408129811286926, 'timestamp': '2025-10-01 04:19:46.502220', 'step': 2873, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:46.558165', 'step': 2873, 'epoch': 1} {'type': 'loss', 'content': 0.08840201050043106, 'timestamp': '2025-10-01 04:19:46.560498', 'step': 2874, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:19:46.614754', 'step': 2874, 'epoch': 1} {'type': 'loss', 'content': 0.15442687273025513, 'timestamp': '2025-10-01 04:19:46.617431', 'step': 2875, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:46.671580', 'step': 2875, 'epoch': 1} {'type': 'loss', 'content': 0.20734046399593353, 'timestamp': '2025-10-01 04:19:46.677843', 'step': 2876, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:46.738012', 'step': 2876, 'epoch': 1} {'type': 'loss', 'content': 0.2299317866563797, 'timestamp': '2025-10-01 04:19:46.740614', 'step': 2877, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:46.794944', 'step': 2877, 'epoch': 1} {'type': 'loss', 'content': 0.1729373037815094, 'timestamp': '2025-10-01 04:19:46.797519', 'step': 2878, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:19:46.857136', 'step': 2878, 'epoch': 1} {'type': 'loss', 'content': 0.16194498538970947, 'timestamp': '2025-10-01 04:19:46.859983', 'step': 2879, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:46.913234', 'step': 2879, 'epoch': 1} {'type': 'loss', 'content': 0.1580791473388672, 'timestamp': '2025-10-01 04:19:46.920696', 'step': 2880, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:19:46.980004', 'step': 2880, 'epoch': 1} {'type': 'loss', 'content': 0.22739705443382263, 'timestamp': '2025-10-01 04:19:46.988551', 'step': 2881, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:47.042311', 'step': 2881, 'epoch': 1} {'type': 'loss', 'content': 0.20365740358829498, 'timestamp': '2025-10-01 04:19:47.045769', 'step': 2882, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:47.099418', 'step': 2882, 'epoch': 1} {'type': 'loss', 'content': 0.16503238677978516, 'timestamp': '2025-10-01 04:19:47.101692', 'step': 2883, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:19:47.155639', 'step': 2883, 'epoch': 1} {'type': 'loss', 'content': 0.24101661145687103, 'timestamp': '2025-10-01 04:19:47.163516', 'step': 2884, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:19:47.216710', 'step': 2884, 'epoch': 1} {'type': 'loss', 'content': 0.12350188195705414, 'timestamp': '2025-10-01 04:19:47.219441', 'step': 2885, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:47.272713', 'step': 2885, 'epoch': 1} {'type': 'loss', 'content': 0.09174300730228424, 'timestamp': '2025-10-01 04:19:47.282501', 'step': 2886, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:47.345772', 'step': 2886, 'epoch': 1} {'type': 'loss', 'content': 0.17011748254299164, 'timestamp': '2025-10-01 04:19:47.348638', 'step': 2887, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:47.403051', 'step': 2887, 'epoch': 1} {'type': 'loss', 'content': 0.12103523313999176, 'timestamp': '2025-10-01 04:19:47.409181', 'step': 2888, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:47.463651', 'step': 2888, 'epoch': 1} {'type': 'loss', 'content': 0.174605593085289, 'timestamp': '2025-10-01 04:19:47.466042', 'step': 2889, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:47.521374', 'step': 2889, 'epoch': 1} {'type': 'loss', 'content': 0.15097878873348236, 'timestamp': '2025-10-01 04:19:47.524143', 'step': 2890, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:47.577963', 'step': 2890, 'epoch': 1} {'type': 'loss', 'content': 0.11669223755598068, 'timestamp': '2025-10-01 04:19:47.580424', 'step': 2891, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:19:47.634055', 'step': 2891, 'epoch': 1} {'type': 'loss', 'content': 0.10780782997608185, 'timestamp': '2025-10-01 04:19:47.640276', 'step': 2892, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:47.700713', 'step': 2892, 'epoch': 1} {'type': 'loss', 'content': 0.15147235989570618, 'timestamp': '2025-10-01 04:19:47.715253', 'step': 2893, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:47.769223', 'step': 2893, 'epoch': 1} {'type': 'loss', 'content': 0.16170553863048553, 'timestamp': '2025-10-01 04:19:47.771569', 'step': 2894, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:47.825606', 'step': 2894, 'epoch': 1} {'type': 'loss', 'content': 0.12455518543720245, 'timestamp': '2025-10-01 04:19:47.829296', 'step': 2895, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-01 04:19:47.887966', 'step': 2895, 'epoch': 1} {'type': 'loss', 'content': 0.179289773106575, 'timestamp': '2025-10-01 04:19:47.895957', 'step': 2896, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:47.958493', 'step': 2896, 'epoch': 1} {'type': 'loss', 'content': 0.10021784156560898, 'timestamp': '2025-10-01 04:19:47.961061', 'step': 2897, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:48.015064', 'step': 2897, 'epoch': 1} {'type': 'loss', 'content': 0.18408598005771637, 'timestamp': '2025-10-01 04:19:48.024228', 'step': 2898, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:48.085969', 'step': 2898, 'epoch': 1} {'type': 'loss', 'content': 0.2228430062532425, 'timestamp': '2025-10-01 04:19:48.089042', 'step': 2899, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:48.143420', 'step': 2899, 'epoch': 1} {'type': 'loss', 'content': 0.1940428614616394, 'timestamp': '2025-10-01 04:19:48.149252', 'step': 2900, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:48.211517', 'step': 2900, 'epoch': 1} {'type': 'loss', 'content': 0.12359819561243057, 'timestamp': '2025-10-01 04:19:48.222377', 'step': 2901, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:48.278065', 'step': 2901, 'epoch': 1} {'type': 'loss', 'content': 0.15040196478366852, 'timestamp': '2025-10-01 04:19:48.280245', 'step': 2902, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:48.333037', 'step': 2902, 'epoch': 1} {'type': 'loss', 'content': 0.23113217949867249, 'timestamp': '2025-10-01 04:19:48.336148', 'step': 2903, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:48.389384', 'step': 2903, 'epoch': 1} {'type': 'loss', 'content': 0.09567341953516006, 'timestamp': '2025-10-01 04:19:48.395547', 'step': 2904, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:19:48.448424', 'step': 2904, 'epoch': 1} {'type': 'loss', 'content': 0.18611803650856018, 'timestamp': '2025-10-01 04:19:48.455885', 'step': 2905, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:48.508447', 'step': 2905, 'epoch': 1} {'type': 'loss', 'content': 0.1670754849910736, 'timestamp': '2025-10-01 04:19:48.510838', 'step': 2906, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:48.563696', 'step': 2906, 'epoch': 1} {'type': 'loss', 'content': 0.11747046560049057, 'timestamp': '2025-10-01 04:19:48.566158', 'step': 2907, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:48.618634', 'step': 2907, 'epoch': 1} {'type': 'loss', 'content': 0.20590820908546448, 'timestamp': '2025-10-01 04:19:48.624569', 'step': 2908, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:48.677318', 'step': 2908, 'epoch': 1} {'type': 'loss', 'content': 0.22622442245483398, 'timestamp': '2025-10-01 04:19:48.679774', 'step': 2909, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:48.735988', 'step': 2909, 'epoch': 1} {'type': 'loss', 'content': 0.09985098242759705, 'timestamp': '2025-10-01 04:19:48.738503', 'step': 2910, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:48.803999', 'step': 2910, 'epoch': 1} {'type': 'loss', 'content': 0.15019404888153076, 'timestamp': '2025-10-01 04:19:48.806383', 'step': 2911, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:48.859802', 'step': 2911, 'epoch': 1} {'type': 'loss', 'content': 0.2066923826932907, 'timestamp': '2025-10-01 04:19:48.865722', 'step': 2912, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:48.917907', 'step': 2912, 'epoch': 1} {'type': 'loss', 'content': 0.1645035296678543, 'timestamp': '2025-10-01 04:19:48.920536', 'step': 2913, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:48.981799', 'step': 2913, 'epoch': 1} {'type': 'loss', 'content': 0.1972610205411911, 'timestamp': '2025-10-01 04:19:48.984376', 'step': 2914, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:49.038053', 'step': 2914, 'epoch': 1} {'type': 'loss', 'content': 0.18206678330898285, 'timestamp': '2025-10-01 04:19:49.040340', 'step': 2915, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:49.093887', 'step': 2915, 'epoch': 1} {'type': 'loss', 'content': 0.16582617163658142, 'timestamp': '2025-10-01 04:19:49.099936', 'step': 2916, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:49.153048', 'step': 2916, 'epoch': 1} {'type': 'loss', 'content': 0.16669414937496185, 'timestamp': '2025-10-01 04:19:49.155598', 'step': 2917, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:49.209026', 'step': 2917, 'epoch': 1} {'type': 'loss', 'content': 0.20162519812583923, 'timestamp': '2025-10-01 04:19:49.211350', 'step': 2918, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:49.265299', 'step': 2918, 'epoch': 1} {'type': 'loss', 'content': 0.170632466673851, 'timestamp': '2025-10-01 04:19:49.267540', 'step': 2919, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:49.320764', 'step': 2919, 'epoch': 1} {'type': 'loss', 'content': 0.2087266445159912, 'timestamp': '2025-10-01 04:19:49.326605', 'step': 2920, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:49.379709', 'step': 2920, 'epoch': 1} {'type': 'loss', 'content': 0.17860333621501923, 'timestamp': '2025-10-01 04:19:49.381778', 'step': 2921, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:49.434614', 'step': 2921, 'epoch': 1} {'type': 'loss', 'content': 0.1584571748971939, 'timestamp': '2025-10-01 04:19:49.437074', 'step': 2922, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:49.490338', 'step': 2922, 'epoch': 1} {'type': 'loss', 'content': 0.22943539917469025, 'timestamp': '2025-10-01 04:19:49.492673', 'step': 2923, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:19:49.546017', 'step': 2923, 'epoch': 1} {'type': 'loss', 'content': 0.18062013387680054, 'timestamp': '2025-10-01 04:19:49.551678', 'step': 2924, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:19:49.605555', 'step': 2924, 'epoch': 1} {'type': 'loss', 'content': 0.2307368665933609, 'timestamp': '2025-10-01 04:19:49.608000', 'step': 2925, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:19:49.661720', 'step': 2925, 'epoch': 1} {'type': 'loss', 'content': 0.18723389506340027, 'timestamp': '2025-10-01 04:19:49.666156', 'step': 2926, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:49.726452', 'step': 2926, 'epoch': 1} {'type': 'loss', 'content': 0.11634496599435806, 'timestamp': '2025-10-01 04:19:49.728854', 'step': 2927, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:49.781676', 'step': 2927, 'epoch': 1} {'type': 'loss', 'content': 0.18309952318668365, 'timestamp': '2025-10-01 04:19:49.792856', 'step': 2928, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:49.845890', 'step': 2928, 'epoch': 1} {'type': 'loss', 'content': 0.23736387491226196, 'timestamp': '2025-10-01 04:19:49.849497', 'step': 2929, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:49.904128', 'step': 2929, 'epoch': 1} {'type': 'loss', 'content': 0.1769876331090927, 'timestamp': '2025-10-01 04:19:49.906439', 'step': 2930, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:49.960322', 'step': 2930, 'epoch': 1} {'type': 'loss', 'content': 0.15648864209651947, 'timestamp': '2025-10-01 04:19:49.964079', 'step': 2931, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:50.019995', 'step': 2931, 'epoch': 1} {'type': 'loss', 'content': 0.21406857669353485, 'timestamp': '2025-10-01 04:19:50.026762', 'step': 2932, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:50.080058', 'step': 2932, 'epoch': 1} {'type': 'loss', 'content': 0.1797512322664261, 'timestamp': '2025-10-01 04:19:50.082415', 'step': 2933, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:50.135626', 'step': 2933, 'epoch': 1} {'type': 'loss', 'content': 0.2546573281288147, 'timestamp': '2025-10-01 04:19:50.138437', 'step': 2934, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:50.191358', 'step': 2934, 'epoch': 1} {'type': 'loss', 'content': 0.1283567100763321, 'timestamp': '2025-10-01 04:19:50.193702', 'step': 2935, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-01 04:19:50.247022', 'step': 2935, 'epoch': 1} {'type': 'loss', 'content': 0.238368958234787, 'timestamp': '2025-10-01 04:19:50.253076', 'step': 2936, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:50.306556', 'step': 2936, 'epoch': 1} {'type': 'loss', 'content': 0.18519356846809387, 'timestamp': '2025-10-01 04:19:50.308785', 'step': 2937, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:19:50.361607', 'step': 2937, 'epoch': 1} {'type': 'loss', 'content': 0.24593040347099304, 'timestamp': '2025-10-01 04:19:50.363502', 'step': 2938, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:50.416097', 'step': 2938, 'epoch': 1} {'type': 'loss', 'content': 0.1826428771018982, 'timestamp': '2025-10-01 04:19:50.418330', 'step': 2939, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:50.471970', 'step': 2939, 'epoch': 1} {'type': 'loss', 'content': 0.2577509880065918, 'timestamp': '2025-10-01 04:19:50.482751', 'step': 2940, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:50.535065', 'step': 2940, 'epoch': 1} {'type': 'loss', 'content': 0.19074079394340515, 'timestamp': '2025-10-01 04:19:50.551600', 'step': 2941, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:50.604743', 'step': 2941, 'epoch': 1} {'type': 'loss', 'content': 0.12881715595722198, 'timestamp': '2025-10-01 04:19:50.606918', 'step': 2942, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:50.671255', 'step': 2942, 'epoch': 1} {'type': 'loss', 'content': 0.1464344561100006, 'timestamp': '2025-10-01 04:19:50.673552', 'step': 2943, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:50.727016', 'step': 2943, 'epoch': 1} {'type': 'loss', 'content': 0.09231050312519073, 'timestamp': '2025-10-01 04:19:50.737538', 'step': 2944, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:50.791647', 'step': 2944, 'epoch': 1} {'type': 'loss', 'content': 0.15126095712184906, 'timestamp': '2025-10-01 04:19:50.794052', 'step': 2945, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:50.847419', 'step': 2945, 'epoch': 1} {'type': 'loss', 'content': 0.15376313030719757, 'timestamp': '2025-10-01 04:19:50.849643', 'step': 2946, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:19:50.903132', 'step': 2946, 'epoch': 1} {'type': 'loss', 'content': 0.17410233616828918, 'timestamp': '2025-10-01 04:19:50.905188', 'step': 2947, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:50.964197', 'step': 2947, 'epoch': 1} {'type': 'loss', 'content': 0.13157007098197937, 'timestamp': '2025-10-01 04:19:50.970038', 'step': 2948, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:51.022920', 'step': 2948, 'epoch': 1} {'type': 'loss', 'content': 0.10062982141971588, 'timestamp': '2025-10-01 04:19:51.025548', 'step': 2949, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:51.078771', 'step': 2949, 'epoch': 1} {'type': 'loss', 'content': 0.1460953950881958, 'timestamp': '2025-10-01 04:19:51.081018', 'step': 2950, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:51.134770', 'step': 2950, 'epoch': 1} {'type': 'loss', 'content': 0.1860996037721634, 'timestamp': '2025-10-01 04:19:51.137018', 'step': 2951, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:19:51.201710', 'step': 2951, 'epoch': 1} {'type': 'loss', 'content': 0.22672060132026672, 'timestamp': '2025-10-01 04:19:51.207478', 'step': 2952, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:51.260150', 'step': 2952, 'epoch': 1} {'type': 'loss', 'content': 0.173719123005867, 'timestamp': '2025-10-01 04:19:51.262619', 'step': 2953, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:51.321789', 'step': 2953, 'epoch': 1} {'type': 'loss', 'content': 0.16557370126247406, 'timestamp': '2025-10-01 04:19:51.324285', 'step': 2954, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:51.377667', 'step': 2954, 'epoch': 1} {'type': 'loss', 'content': 0.1623653918504715, 'timestamp': '2025-10-01 04:19:51.379879', 'step': 2955, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:51.432636', 'step': 2955, 'epoch': 1} {'type': 'loss', 'content': 0.10536235570907593, 'timestamp': '2025-10-01 04:19:51.439629', 'step': 2956, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:51.496476', 'step': 2956, 'epoch': 1} {'type': 'loss', 'content': 0.17857734858989716, 'timestamp': '2025-10-01 04:19:51.498539', 'step': 2957, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:51.551391', 'step': 2957, 'epoch': 1} {'type': 'loss', 'content': 0.24705328047275543, 'timestamp': '2025-10-01 04:19:51.553544', 'step': 2958, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:51.606577', 'step': 2958, 'epoch': 1} {'type': 'loss', 'content': 0.2032073438167572, 'timestamp': '2025-10-01 04:19:51.608877', 'step': 2959, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:51.662300', 'step': 2959, 'epoch': 1} {'type': 'loss', 'content': 0.17462387681007385, 'timestamp': '2025-10-01 04:19:51.668127', 'step': 2960, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:51.720377', 'step': 2960, 'epoch': 1} {'type': 'loss', 'content': 0.20173916220664978, 'timestamp': '2025-10-01 04:19:51.722698', 'step': 2961, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:51.775894', 'step': 2961, 'epoch': 1} {'type': 'loss', 'content': 0.17067134380340576, 'timestamp': '2025-10-01 04:19:51.777997', 'step': 2962, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:51.831154', 'step': 2962, 'epoch': 1} {'type': 'loss', 'content': 0.15771006047725677, 'timestamp': '2025-10-01 04:19:51.833346', 'step': 2963, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:51.886199', 'step': 2963, 'epoch': 1} {'type': 'loss', 'content': 0.14929574728012085, 'timestamp': '2025-10-01 04:19:51.892141', 'step': 2964, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:51.944652', 'step': 2964, 'epoch': 1} {'type': 'loss', 'content': 0.20123328268527985, 'timestamp': '2025-10-01 04:19:51.947170', 'step': 2965, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:52.008445', 'step': 2965, 'epoch': 1} {'type': 'loss', 'content': 0.23210129141807556, 'timestamp': '2025-10-01 04:19:52.011247', 'step': 2966, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:52.065270', 'step': 2966, 'epoch': 1} {'type': 'loss', 'content': 0.17326945066452026, 'timestamp': '2025-10-01 04:19:52.067696', 'step': 2967, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:52.120473', 'step': 2967, 'epoch': 1} {'type': 'loss', 'content': 0.10623034834861755, 'timestamp': '2025-10-01 04:19:52.126553', 'step': 2968, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:52.178994', 'step': 2968, 'epoch': 1} {'type': 'loss', 'content': 0.15393395721912384, 'timestamp': '2025-10-01 04:19:52.181187', 'step': 2969, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:52.233890', 'step': 2969, 'epoch': 1} {'type': 'loss', 'content': 0.14133402705192566, 'timestamp': '2025-10-01 04:19:52.236131', 'step': 2970, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:52.289151', 'step': 2970, 'epoch': 1} {'type': 'loss', 'content': 0.22620370984077454, 'timestamp': '2025-10-01 04:19:52.291414', 'step': 2971, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:52.344758', 'step': 2971, 'epoch': 1} {'type': 'loss', 'content': 0.20142121613025665, 'timestamp': '2025-10-01 04:19:52.350412', 'step': 2972, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:52.403050', 'step': 2972, 'epoch': 1} {'type': 'loss', 'content': 0.14864343404769897, 'timestamp': '2025-10-01 04:19:52.405206', 'step': 2973, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:19:52.458583', 'step': 2973, 'epoch': 1} {'type': 'loss', 'content': 0.14202512800693512, 'timestamp': '2025-10-01 04:19:52.460985', 'step': 2974, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:52.514410', 'step': 2974, 'epoch': 1} {'type': 'loss', 'content': 0.1751742660999298, 'timestamp': '2025-10-01 04:19:52.516601', 'step': 2975, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:19:52.569109', 'step': 2975, 'epoch': 1} {'type': 'loss', 'content': 0.14406262338161469, 'timestamp': '2025-10-01 04:19:52.574825', 'step': 2976, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-01 04:19:52.628822', 'step': 2976, 'epoch': 1} {'type': 'loss', 'content': 0.1672266721725464, 'timestamp': '2025-10-01 04:19:52.630865', 'step': 2977, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:52.683954', 'step': 2977, 'epoch': 1} {'type': 'loss', 'content': 0.14769606292247772, 'timestamp': '2025-10-01 04:19:52.685833', 'step': 2978, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:52.738791', 'step': 2978, 'epoch': 1} {'type': 'loss', 'content': 0.10516577214002609, 'timestamp': '2025-10-01 04:19:52.740874', 'step': 2979, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:52.798504', 'step': 2979, 'epoch': 1} {'type': 'loss', 'content': 0.1860605925321579, 'timestamp': '2025-10-01 04:19:52.804249', 'step': 2980, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:19:52.856663', 'step': 2980, 'epoch': 1} {'type': 'loss', 'content': 0.12971019744873047, 'timestamp': '2025-10-01 04:19:52.858662', 'step': 2981, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:52.911186', 'step': 2981, 'epoch': 1} {'type': 'loss', 'content': 0.19194217026233673, 'timestamp': '2025-10-01 04:19:52.913362', 'step': 2982, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:52.975428', 'step': 2982, 'epoch': 1} {'type': 'loss', 'content': 0.17026440799236298, 'timestamp': '2025-10-01 04:19:52.977473', 'step': 2983, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:53.036401', 'step': 2983, 'epoch': 1} {'type': 'loss', 'content': 0.27543905377388, 'timestamp': '2025-10-01 04:19:53.042144', 'step': 2984, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:53.094638', 'step': 2984, 'epoch': 1} {'type': 'loss', 'content': 0.10971308499574661, 'timestamp': '2025-10-01 04:19:53.097473', 'step': 2985, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:19:53.159332', 'step': 2985, 'epoch': 1} {'type': 'loss', 'content': 0.18565185368061066, 'timestamp': '2025-10-01 04:19:53.161545', 'step': 2986, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:19:53.220275', 'step': 2986, 'epoch': 1} {'type': 'loss', 'content': 0.09681662917137146, 'timestamp': '2025-10-01 04:19:53.222571', 'step': 2987, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:19:53.287957', 'step': 2987, 'epoch': 1} {'type': 'loss', 'content': 0.24353380501270294, 'timestamp': '2025-10-01 04:19:53.293895', 'step': 2988, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:53.345946', 'step': 2988, 'epoch': 1} {'type': 'loss', 'content': 0.23232030868530273, 'timestamp': '2025-10-01 04:19:53.347940', 'step': 2989, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:19:53.407236', 'step': 2989, 'epoch': 1} {'type': 'loss', 'content': 0.16866160929203033, 'timestamp': '2025-10-01 04:19:53.409479', 'step': 2990, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:53.461862', 'step': 2990, 'epoch': 1} {'type': 'loss', 'content': 0.13919207453727722, 'timestamp': '2025-10-01 04:19:53.467575', 'step': 2991, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:53.520539', 'step': 2991, 'epoch': 1} {'type': 'loss', 'content': 0.23218581080436707, 'timestamp': '2025-10-01 04:19:53.526426', 'step': 2992, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:53.578803', 'step': 2992, 'epoch': 1} {'type': 'loss', 'content': 0.2885834872722626, 'timestamp': '2025-10-01 04:19:53.580900', 'step': 2993, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:53.633456', 'step': 2993, 'epoch': 1} {'type': 'loss', 'content': 0.23482176661491394, 'timestamp': '2025-10-01 04:19:53.635565', 'step': 2994, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:53.689058', 'step': 2994, 'epoch': 1} {'type': 'loss', 'content': 0.23990699648857117, 'timestamp': '2025-10-01 04:19:53.691582', 'step': 2995, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:53.744612', 'step': 2995, 'epoch': 1} {'type': 'loss', 'content': 0.1589481383562088, 'timestamp': '2025-10-01 04:19:53.752674', 'step': 2996, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:53.805168', 'step': 2996, 'epoch': 1} {'type': 'loss', 'content': 0.18329322338104248, 'timestamp': '2025-10-01 04:19:53.812544', 'step': 2997, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:53.865221', 'step': 2997, 'epoch': 1} {'type': 'loss', 'content': 0.1396206021308899, 'timestamp': '2025-10-01 04:19:53.867668', 'step': 2998, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:19:53.921616', 'step': 2998, 'epoch': 1} {'type': 'loss', 'content': 0.12758171558380127, 'timestamp': '2025-10-01 04:19:53.924075', 'step': 2999, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:53.978762', 'step': 2999, 'epoch': 1} {'type': 'loss', 'content': 0.13934078812599182, 'timestamp': '2025-10-01 04:19:53.984711', 'step': 3000, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 3000', 'timestamp': '2025-10-01 04:19:54.359458', 'step': 3000, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:54.413108', 'step': 3000, 'epoch': 1} {'type': 'loss', 'content': 0.1235739216208458, 'timestamp': '2025-10-01 04:19:54.415469', 'step': 3001, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:54.469431', 'step': 3001, 'epoch': 1} {'type': 'loss', 'content': 0.17299576103687286, 'timestamp': '2025-10-01 04:19:54.472078', 'step': 3002, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:54.526288', 'step': 3002, 'epoch': 1} {'type': 'loss', 'content': 0.14361992478370667, 'timestamp': '2025-10-01 04:19:54.528825', 'step': 3003, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:54.582491', 'step': 3003, 'epoch': 1} {'type': 'loss', 'content': 0.15138287842273712, 'timestamp': '2025-10-01 04:19:54.590701', 'step': 3004, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:54.642771', 'step': 3004, 'epoch': 1} {'type': 'loss', 'content': 0.09158226102590561, 'timestamp': '2025-10-01 04:19:54.648651', 'step': 3005, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:54.701359', 'step': 3005, 'epoch': 1} {'type': 'loss', 'content': 0.17362992465496063, 'timestamp': '2025-10-01 04:19:54.703650', 'step': 3006, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:19:54.757101', 'step': 3006, 'epoch': 1} {'type': 'loss', 'content': 0.20978327095508575, 'timestamp': '2025-10-01 04:19:54.759363', 'step': 3007, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:54.813036', 'step': 3007, 'epoch': 1} {'type': 'loss', 'content': 0.08229692280292511, 'timestamp': '2025-10-01 04:19:54.818566', 'step': 3008, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:54.875484', 'step': 3008, 'epoch': 1} {'type': 'loss', 'content': 0.19966790080070496, 'timestamp': '2025-10-01 04:19:54.877810', 'step': 3009, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:19:54.931164', 'step': 3009, 'epoch': 1} {'type': 'loss', 'content': 0.28164270520210266, 'timestamp': '2025-10-01 04:19:54.934555', 'step': 3010, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:54.990132', 'step': 3010, 'epoch': 1} {'type': 'loss', 'content': 0.14766864478588104, 'timestamp': '2025-10-01 04:19:54.992822', 'step': 3011, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:55.046232', 'step': 3011, 'epoch': 1} {'type': 'loss', 'content': 0.08967500180006027, 'timestamp': '2025-10-01 04:19:55.051875', 'step': 3012, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:55.105334', 'step': 3012, 'epoch': 1} {'type': 'loss', 'content': 0.15390752255916595, 'timestamp': '2025-10-01 04:19:55.107379', 'step': 3013, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:55.160103', 'step': 3013, 'epoch': 1} {'type': 'loss', 'content': 0.12490402907133102, 'timestamp': '2025-10-01 04:19:55.162322', 'step': 3014, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:55.215178', 'step': 3014, 'epoch': 1} {'type': 'loss', 'content': 0.15274515748023987, 'timestamp': '2025-10-01 04:19:55.217535', 'step': 3015, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:55.270898', 'step': 3015, 'epoch': 1} {'type': 'loss', 'content': 0.22062064707279205, 'timestamp': '2025-10-01 04:19:55.278421', 'step': 3016, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:55.336324', 'step': 3016, 'epoch': 1} {'type': 'loss', 'content': 0.20301197469234467, 'timestamp': '2025-10-01 04:19:55.338701', 'step': 3017, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:19:55.391772', 'step': 3017, 'epoch': 1} {'type': 'loss', 'content': 0.22078680992126465, 'timestamp': '2025-10-01 04:19:55.394327', 'step': 3018, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:55.458294', 'step': 3018, 'epoch': 1} {'type': 'loss', 'content': 0.10663797706365585, 'timestamp': '2025-10-01 04:19:55.466288', 'step': 3019, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:19:55.518963', 'step': 3019, 'epoch': 1} {'type': 'loss', 'content': 0.16801531612873077, 'timestamp': '2025-10-01 04:19:55.524792', 'step': 3020, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:55.578154', 'step': 3020, 'epoch': 1} {'type': 'loss', 'content': 0.14136478304862976, 'timestamp': '2025-10-01 04:19:55.580111', 'step': 3021, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:19:55.632955', 'step': 3021, 'epoch': 1} {'type': 'loss', 'content': 0.12782658636569977, 'timestamp': '2025-10-01 04:19:55.635437', 'step': 3022, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:55.688480', 'step': 3022, 'epoch': 1} {'type': 'loss', 'content': 0.2176172286272049, 'timestamp': '2025-10-01 04:19:55.691284', 'step': 3023, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:55.744567', 'step': 3023, 'epoch': 1} {'type': 'loss', 'content': 0.18667072057724, 'timestamp': '2025-10-01 04:19:55.750492', 'step': 3024, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:19:55.803896', 'step': 3024, 'epoch': 1} {'type': 'loss', 'content': 0.1713266223669052, 'timestamp': '2025-10-01 04:19:55.806066', 'step': 3025, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:55.859270', 'step': 3025, 'epoch': 1} {'type': 'loss', 'content': 0.09219412505626678, 'timestamp': '2025-10-01 04:19:55.861428', 'step': 3026, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:55.916986', 'step': 3026, 'epoch': 1} {'type': 'loss', 'content': 0.2137012481689453, 'timestamp': '2025-10-01 04:19:55.919132', 'step': 3027, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:55.978422', 'step': 3027, 'epoch': 1} {'type': 'loss', 'content': 0.2255227416753769, 'timestamp': '2025-10-01 04:19:55.984492', 'step': 3028, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:56.052269', 'step': 3028, 'epoch': 1} {'type': 'loss', 'content': 0.12746073305606842, 'timestamp': '2025-10-01 04:19:56.054638', 'step': 3029, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:56.119077', 'step': 3029, 'epoch': 1} {'type': 'loss', 'content': 0.18449251353740692, 'timestamp': '2025-10-01 04:19:56.128885', 'step': 3030, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:56.188917', 'step': 3030, 'epoch': 1} {'type': 'loss', 'content': 0.15318934619426727, 'timestamp': '2025-10-01 04:19:56.191359', 'step': 3031, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:56.248171', 'step': 3031, 'epoch': 1} {'type': 'loss', 'content': 0.17817527055740356, 'timestamp': '2025-10-01 04:19:56.253916', 'step': 3032, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:56.311326', 'step': 3032, 'epoch': 1} {'type': 'loss', 'content': 0.18937985599040985, 'timestamp': '2025-10-01 04:19:56.318667', 'step': 3033, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:56.371672', 'step': 3033, 'epoch': 1} {'type': 'loss', 'content': 0.1863178312778473, 'timestamp': '2025-10-01 04:19:56.373977', 'step': 3034, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:19:56.431542', 'step': 3034, 'epoch': 1} {'type': 'loss', 'content': 0.14490237832069397, 'timestamp': '2025-10-01 04:19:56.433770', 'step': 3035, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:56.486619', 'step': 3035, 'epoch': 1} {'type': 'loss', 'content': 0.1789778769016266, 'timestamp': '2025-10-01 04:19:56.492438', 'step': 3036, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:56.544550', 'step': 3036, 'epoch': 1} {'type': 'loss', 'content': 0.15178988873958588, 'timestamp': '2025-10-01 04:19:56.557596', 'step': 3037, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:56.610738', 'step': 3037, 'epoch': 1} {'type': 'loss', 'content': 0.22806796431541443, 'timestamp': '2025-10-01 04:19:56.614384', 'step': 3038, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:56.668887', 'step': 3038, 'epoch': 1} {'type': 'loss', 'content': 0.13906604051589966, 'timestamp': '2025-10-01 04:19:56.671077', 'step': 3039, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:19:56.724067', 'step': 3039, 'epoch': 1} {'type': 'loss', 'content': 0.23181433975696564, 'timestamp': '2025-10-01 04:19:56.729945', 'step': 3040, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:56.782430', 'step': 3040, 'epoch': 1} {'type': 'loss', 'content': 0.10200781375169754, 'timestamp': '2025-10-01 04:19:56.784938', 'step': 3041, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:56.837616', 'step': 3041, 'epoch': 1} {'type': 'loss', 'content': 0.15899643301963806, 'timestamp': '2025-10-01 04:19:56.839876', 'step': 3042, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:56.897130', 'step': 3042, 'epoch': 1} {'type': 'loss', 'content': 0.22101226449012756, 'timestamp': '2025-10-01 04:19:56.899364', 'step': 3043, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:56.952277', 'step': 3043, 'epoch': 1} {'type': 'loss', 'content': 0.22279402613639832, 'timestamp': '2025-10-01 04:19:56.957941', 'step': 3044, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:57.010546', 'step': 3044, 'epoch': 1} {'type': 'loss', 'content': 0.27582111954689026, 'timestamp': '2025-10-01 04:19:57.012724', 'step': 3045, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:57.071521', 'step': 3045, 'epoch': 1} {'type': 'loss', 'content': 0.12109970301389694, 'timestamp': '2025-10-01 04:19:57.073842', 'step': 3046, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:57.131993', 'step': 3046, 'epoch': 1} {'type': 'loss', 'content': 0.24565580487251282, 'timestamp': '2025-10-01 04:19:57.140768', 'step': 3047, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:19:57.193356', 'step': 3047, 'epoch': 1} {'type': 'loss', 'content': 0.1311815232038498, 'timestamp': '2025-10-01 04:19:57.199024', 'step': 3048, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:57.263106', 'step': 3048, 'epoch': 1} {'type': 'loss', 'content': 0.15009760856628418, 'timestamp': '2025-10-01 04:19:57.265280', 'step': 3049, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:57.317876', 'step': 3049, 'epoch': 1} {'type': 'loss', 'content': 0.1136123389005661, 'timestamp': '2025-10-01 04:19:57.320177', 'step': 3050, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:57.373123', 'step': 3050, 'epoch': 1} {'type': 'loss', 'content': 0.18981458246707916, 'timestamp': '2025-10-01 04:19:57.375350', 'step': 3051, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:57.428241', 'step': 3051, 'epoch': 1} {'type': 'loss', 'content': 0.16610215604305267, 'timestamp': '2025-10-01 04:19:57.433966', 'step': 3052, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:57.486245', 'step': 3052, 'epoch': 1} {'type': 'loss', 'content': 0.21406111121177673, 'timestamp': '2025-10-01 04:19:57.488318', 'step': 3053, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:57.540390', 'step': 3053, 'epoch': 1} {'type': 'loss', 'content': 0.20674104988574982, 'timestamp': '2025-10-01 04:19:57.542805', 'step': 3054, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:57.595586', 'step': 3054, 'epoch': 1} {'type': 'loss', 'content': 0.11842457205057144, 'timestamp': '2025-10-01 04:19:57.597780', 'step': 3055, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:57.650164', 'step': 3055, 'epoch': 1} {'type': 'loss', 'content': 0.20892736315727234, 'timestamp': '2025-10-01 04:19:57.655900', 'step': 3056, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:57.708301', 'step': 3056, 'epoch': 1} {'type': 'loss', 'content': 0.09834498167037964, 'timestamp': '2025-10-01 04:19:57.710665', 'step': 3057, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:57.763561', 'step': 3057, 'epoch': 1} {'type': 'loss', 'content': 0.2851499021053314, 'timestamp': '2025-10-01 04:19:57.766273', 'step': 3058, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:57.821384', 'step': 3058, 'epoch': 1} {'type': 'loss', 'content': 0.14299269020557404, 'timestamp': '2025-10-01 04:19:57.824682', 'step': 3059, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:57.877704', 'step': 3059, 'epoch': 1} {'type': 'loss', 'content': 0.2709820866584778, 'timestamp': '2025-10-01 04:19:57.884344', 'step': 3060, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:19:57.937133', 'step': 3060, 'epoch': 1} {'type': 'loss', 'content': 0.09389539062976837, 'timestamp': '2025-10-01 04:19:57.939410', 'step': 3061, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:57.992160', 'step': 3061, 'epoch': 1} {'type': 'loss', 'content': 0.12404938042163849, 'timestamp': '2025-10-01 04:19:57.994434', 'step': 3062, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:58.047789', 'step': 3062, 'epoch': 1} {'type': 'loss', 'content': 0.2697664499282837, 'timestamp': '2025-10-01 04:19:58.050020', 'step': 3063, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:58.103133', 'step': 3063, 'epoch': 1} {'type': 'loss', 'content': 0.14389443397521973, 'timestamp': '2025-10-01 04:19:58.108892', 'step': 3064, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:58.161947', 'step': 3064, 'epoch': 1} {'type': 'loss', 'content': 0.1865745633840561, 'timestamp': '2025-10-01 04:19:58.164143', 'step': 3065, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:58.217021', 'step': 3065, 'epoch': 1} {'type': 'loss', 'content': 0.0963912084698677, 'timestamp': '2025-10-01 04:19:58.223704', 'step': 3066, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:58.276144', 'step': 3066, 'epoch': 1} {'type': 'loss', 'content': 0.16054093837738037, 'timestamp': '2025-10-01 04:19:58.278352', 'step': 3067, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:58.331241', 'step': 3067, 'epoch': 1} {'type': 'loss', 'content': 0.1333998292684555, 'timestamp': '2025-10-01 04:19:58.336952', 'step': 3068, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:58.389736', 'step': 3068, 'epoch': 1} {'type': 'loss', 'content': 0.24884916841983795, 'timestamp': '2025-10-01 04:19:58.391911', 'step': 3069, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:58.445324', 'step': 3069, 'epoch': 1} {'type': 'loss', 'content': 0.23746874928474426, 'timestamp': '2025-10-01 04:19:58.447588', 'step': 3070, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:58.501792', 'step': 3070, 'epoch': 1} {'type': 'loss', 'content': 0.21841324865818024, 'timestamp': '2025-10-01 04:19:58.504082', 'step': 3071, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:58.557496', 'step': 3071, 'epoch': 1} {'type': 'loss', 'content': 0.2508799433708191, 'timestamp': '2025-10-01 04:19:58.563500', 'step': 3072, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:58.618701', 'step': 3072, 'epoch': 1} {'type': 'loss', 'content': 0.23503398895263672, 'timestamp': '2025-10-01 04:19:58.621055', 'step': 3073, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:58.675814', 'step': 3073, 'epoch': 1} {'type': 'loss', 'content': 0.12842179834842682, 'timestamp': '2025-10-01 04:19:58.678148', 'step': 3074, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:19:58.732039', 'step': 3074, 'epoch': 1} {'type': 'loss', 'content': 0.1559346318244934, 'timestamp': '2025-10-01 04:19:58.734538', 'step': 3075, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:58.788526', 'step': 3075, 'epoch': 1} {'type': 'loss', 'content': 0.10314523428678513, 'timestamp': '2025-10-01 04:19:58.794814', 'step': 3076, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:58.848540', 'step': 3076, 'epoch': 1} {'type': 'loss', 'content': 0.21036970615386963, 'timestamp': '2025-10-01 04:19:58.850734', 'step': 3077, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:58.904988', 'step': 3077, 'epoch': 1} {'type': 'loss', 'content': 0.1607164442539215, 'timestamp': '2025-10-01 04:19:58.907219', 'step': 3078, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:58.961999', 'step': 3078, 'epoch': 1} {'type': 'loss', 'content': 0.13513603806495667, 'timestamp': '2025-10-01 04:19:58.964241', 'step': 3079, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:59.019315', 'step': 3079, 'epoch': 1} {'type': 'loss', 'content': 0.2515545189380646, 'timestamp': '2025-10-01 04:19:59.025650', 'step': 3080, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:59.079371', 'step': 3080, 'epoch': 1} {'type': 'loss', 'content': 0.2041921764612198, 'timestamp': '2025-10-01 04:19:59.081591', 'step': 3081, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:59.136039', 'step': 3081, 'epoch': 1} {'type': 'loss', 'content': 0.2483200579881668, 'timestamp': '2025-10-01 04:19:59.138256', 'step': 3082, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:59.208232', 'step': 3082, 'epoch': 1} {'type': 'loss', 'content': 0.16211466491222382, 'timestamp': '2025-10-01 04:19:59.211348', 'step': 3083, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:59.265201', 'step': 3083, 'epoch': 1} {'type': 'loss', 'content': 0.1828502118587494, 'timestamp': '2025-10-01 04:19:59.271348', 'step': 3084, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:19:59.324281', 'step': 3084, 'epoch': 1} {'type': 'loss', 'content': 0.07510673254728317, 'timestamp': '2025-10-01 04:19:59.326575', 'step': 3085, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:19:59.381139', 'step': 3085, 'epoch': 1} {'type': 'loss', 'content': 0.18631039559841156, 'timestamp': '2025-10-01 04:19:59.384093', 'step': 3086, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:59.437463', 'step': 3086, 'epoch': 1} {'type': 'loss', 'content': 0.1625693440437317, 'timestamp': '2025-10-01 04:19:59.439784', 'step': 3087, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:19:59.494548', 'step': 3087, 'epoch': 1} {'type': 'loss', 'content': 0.22693681716918945, 'timestamp': '2025-10-01 04:19:59.500822', 'step': 3088, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:59.553434', 'step': 3088, 'epoch': 1} {'type': 'loss', 'content': 0.2642441689968109, 'timestamp': '2025-10-01 04:19:59.555805', 'step': 3089, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:59.608774', 'step': 3089, 'epoch': 1} {'type': 'loss', 'content': 0.11375955492258072, 'timestamp': '2025-10-01 04:19:59.612083', 'step': 3090, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:59.665344', 'step': 3090, 'epoch': 1} {'type': 'loss', 'content': 0.2087486833333969, 'timestamp': '2025-10-01 04:19:59.667574', 'step': 3091, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:19:59.720933', 'step': 3091, 'epoch': 1} {'type': 'loss', 'content': 0.16544070839881897, 'timestamp': '2025-10-01 04:19:59.727070', 'step': 3092, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:59.780772', 'step': 3092, 'epoch': 1} {'type': 'loss', 'content': 0.20969556272029877, 'timestamp': '2025-10-01 04:19:59.784067', 'step': 3093, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:19:59.842166', 'step': 3093, 'epoch': 1} {'type': 'loss', 'content': 0.12080389261245728, 'timestamp': '2025-10-01 04:19:59.845805', 'step': 3094, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:59.906095', 'step': 3094, 'epoch': 1} {'type': 'loss', 'content': 0.14170177280902863, 'timestamp': '2025-10-01 04:19:59.908438', 'step': 3095, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:19:59.961863', 'step': 3095, 'epoch': 1} {'type': 'loss', 'content': 0.18318063020706177, 'timestamp': '2025-10-01 04:19:59.968013', 'step': 3096, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:00.022457', 'step': 3096, 'epoch': 1} {'type': 'loss', 'content': 0.09930591285228729, 'timestamp': '2025-10-01 04:20:00.024820', 'step': 3097, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:00.077614', 'step': 3097, 'epoch': 1} {'type': 'loss', 'content': 0.24404698610305786, 'timestamp': '2025-10-01 04:20:00.079890', 'step': 3098, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:00.133794', 'step': 3098, 'epoch': 1} {'type': 'loss', 'content': 0.1415247768163681, 'timestamp': '2025-10-01 04:20:00.137403', 'step': 3099, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:00.194854', 'step': 3099, 'epoch': 1} {'type': 'loss', 'content': 0.21066232025623322, 'timestamp': '2025-10-01 04:20:00.202516', 'step': 3100, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:20:00.262626', 'step': 3100, 'epoch': 1} {'type': 'loss', 'content': 0.22044149041175842, 'timestamp': '2025-10-01 04:20:00.265352', 'step': 3101, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:00.326113', 'step': 3101, 'epoch': 1} {'type': 'loss', 'content': 0.10520666837692261, 'timestamp': '2025-10-01 04:20:00.328491', 'step': 3102, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:00.387240', 'step': 3102, 'epoch': 1} {'type': 'loss', 'content': 0.19381050765514374, 'timestamp': '2025-10-01 04:20:00.389840', 'step': 3103, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:00.447689', 'step': 3103, 'epoch': 1} {'type': 'loss', 'content': 0.10337504744529724, 'timestamp': '2025-10-01 04:20:00.454851', 'step': 3104, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:00.512205', 'step': 3104, 'epoch': 1} {'type': 'loss', 'content': 0.24477538466453552, 'timestamp': '2025-10-01 04:20:00.514619', 'step': 3105, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:20:00.579951', 'step': 3105, 'epoch': 1} {'type': 'loss', 'content': 0.17213031649589539, 'timestamp': '2025-10-01 04:20:00.582535', 'step': 3106, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:00.653601', 'step': 3106, 'epoch': 1} {'type': 'loss', 'content': 0.11188145726919174, 'timestamp': '2025-10-01 04:20:00.656551', 'step': 3107, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:00.714105', 'step': 3107, 'epoch': 1} {'type': 'loss', 'content': 0.11544226109981537, 'timestamp': '2025-10-01 04:20:00.721154', 'step': 3108, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:00.780578', 'step': 3108, 'epoch': 1} {'type': 'loss', 'content': 0.15035872161388397, 'timestamp': '2025-10-01 04:20:00.782991', 'step': 3109, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:00.839802', 'step': 3109, 'epoch': 1} {'type': 'loss', 'content': 0.18103599548339844, 'timestamp': '2025-10-01 04:20:00.842123', 'step': 3110, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:00.897907', 'step': 3110, 'epoch': 1} {'type': 'loss', 'content': 0.11311017721891403, 'timestamp': '2025-10-01 04:20:00.904870', 'step': 3111, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:00.974997', 'step': 3111, 'epoch': 1} {'type': 'loss', 'content': 0.20940177142620087, 'timestamp': '2025-10-01 04:20:00.981312', 'step': 3112, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:01.034402', 'step': 3112, 'epoch': 1} {'type': 'loss', 'content': 0.13688182830810547, 'timestamp': '2025-10-01 04:20:01.036587', 'step': 3113, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:01.090091', 'step': 3113, 'epoch': 1} {'type': 'loss', 'content': 0.1958344727754593, 'timestamp': '2025-10-01 04:20:01.092889', 'step': 3114, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:01.148125', 'step': 3114, 'epoch': 1} {'type': 'loss', 'content': 0.15316754579544067, 'timestamp': '2025-10-01 04:20:01.153720', 'step': 3115, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:20:01.233712', 'step': 3115, 'epoch': 1} {'type': 'loss', 'content': 0.29533106088638306, 'timestamp': '2025-10-01 04:20:01.246787', 'step': 3116, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:01.302747', 'step': 3116, 'epoch': 1} {'type': 'loss', 'content': 0.17119254171848297, 'timestamp': '2025-10-01 04:20:01.316385', 'step': 3117, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:01.371894', 'step': 3117, 'epoch': 1} {'type': 'loss', 'content': 0.15141509473323822, 'timestamp': '2025-10-01 04:20:01.380479', 'step': 3118, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:20:01.444785', 'step': 3118, 'epoch': 1} {'type': 'loss', 'content': 0.1454126089811325, 'timestamp': '2025-10-01 04:20:01.447417', 'step': 3119, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:01.523503', 'step': 3119, 'epoch': 1} {'type': 'loss', 'content': 0.1482582837343216, 'timestamp': '2025-10-01 04:20:01.531864', 'step': 3120, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:01.605277', 'step': 3120, 'epoch': 1} {'type': 'loss', 'content': 0.1385468989610672, 'timestamp': '2025-10-01 04:20:01.607678', 'step': 3121, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:20:01.670897', 'step': 3121, 'epoch': 1} {'type': 'loss', 'content': 0.19370369613170624, 'timestamp': '2025-10-01 04:20:01.675909', 'step': 3122, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:01.732146', 'step': 3122, 'epoch': 1} {'type': 'loss', 'content': 0.13946785032749176, 'timestamp': '2025-10-01 04:20:01.741584', 'step': 3123, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:01.800530', 'step': 3123, 'epoch': 1} {'type': 'loss', 'content': 0.14665642380714417, 'timestamp': '2025-10-01 04:20:01.816491', 'step': 3124, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:01.891905', 'step': 3124, 'epoch': 1} {'type': 'loss', 'content': 0.14463581144809723, 'timestamp': '2025-10-01 04:20:01.897264', 'step': 3125, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:01.951148', 'step': 3125, 'epoch': 1} {'type': 'loss', 'content': 0.17538979649543762, 'timestamp': '2025-10-01 04:20:01.953769', 'step': 3126, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:20:02.037486', 'step': 3126, 'epoch': 1} {'type': 'loss', 'content': 0.182223379611969, 'timestamp': '2025-10-01 04:20:02.040837', 'step': 3127, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:20:02.094886', 'step': 3127, 'epoch': 1} {'type': 'loss', 'content': 0.13865329325199127, 'timestamp': '2025-10-01 04:20:02.112590', 'step': 3128, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:20:02.171583', 'step': 3128, 'epoch': 1} {'type': 'loss', 'content': 0.14911003410816193, 'timestamp': '2025-10-01 04:20:02.176795', 'step': 3129, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:02.237147', 'step': 3129, 'epoch': 1} {'type': 'loss', 'content': 0.3143301010131836, 'timestamp': '2025-10-01 04:20:02.239775', 'step': 3130, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:02.294219', 'step': 3130, 'epoch': 1} {'type': 'loss', 'content': 0.12753009796142578, 'timestamp': '2025-10-01 04:20:02.297428', 'step': 3131, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:02.356107', 'step': 3131, 'epoch': 1} {'type': 'loss', 'content': 0.15253324806690216, 'timestamp': '2025-10-01 04:20:02.370119', 'step': 3132, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:02.427932', 'step': 3132, 'epoch': 1} {'type': 'loss', 'content': 0.20523105561733246, 'timestamp': '2025-10-01 04:20:02.430533', 'step': 3133, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:02.491733', 'step': 3133, 'epoch': 1} {'type': 'loss', 'content': 0.14513878524303436, 'timestamp': '2025-10-01 04:20:02.494665', 'step': 3134, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:02.548385', 'step': 3134, 'epoch': 1} {'type': 'loss', 'content': 0.19302423298358917, 'timestamp': '2025-10-01 04:20:02.550794', 'step': 3135, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:02.603054', 'step': 3135, 'epoch': 1} {'type': 'loss', 'content': 0.18681053817272186, 'timestamp': '2025-10-01 04:20:02.608872', 'step': 3136, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:20:02.662000', 'step': 3136, 'epoch': 1} {'type': 'loss', 'content': 0.15195615589618683, 'timestamp': '2025-10-01 04:20:02.664864', 'step': 3137, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:02.718101', 'step': 3137, 'epoch': 1} {'type': 'loss', 'content': 0.11381351947784424, 'timestamp': '2025-10-01 04:20:02.720683', 'step': 3138, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:02.774127', 'step': 3138, 'epoch': 1} {'type': 'loss', 'content': 0.12625935673713684, 'timestamp': '2025-10-01 04:20:02.776677', 'step': 3139, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:02.830884', 'step': 3139, 'epoch': 1} {'type': 'loss', 'content': 0.21857230365276337, 'timestamp': '2025-10-01 04:20:02.836846', 'step': 3140, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-01 04:20:02.891149', 'step': 3140, 'epoch': 1} {'type': 'loss', 'content': 0.2234424352645874, 'timestamp': '2025-10-01 04:20:02.893381', 'step': 3141, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:02.946382', 'step': 3141, 'epoch': 1} {'type': 'loss', 'content': 0.1358402520418167, 'timestamp': '2025-10-01 04:20:02.948484', 'step': 3142, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:03.001344', 'step': 3142, 'epoch': 1} {'type': 'loss', 'content': 0.33850783109664917, 'timestamp': '2025-10-01 04:20:03.003969', 'step': 3143, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:03.061473', 'step': 3143, 'epoch': 1} {'type': 'loss', 'content': 0.20494073629379272, 'timestamp': '2025-10-01 04:20:03.067111', 'step': 3144, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:03.119372', 'step': 3144, 'epoch': 1} {'type': 'loss', 'content': 0.17512644827365875, 'timestamp': '2025-10-01 04:20:03.122012', 'step': 3145, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:03.175319', 'step': 3145, 'epoch': 1} {'type': 'loss', 'content': 0.22103922069072723, 'timestamp': '2025-10-01 04:20:03.178018', 'step': 3146, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:03.231276', 'step': 3146, 'epoch': 1} {'type': 'loss', 'content': 0.17535777390003204, 'timestamp': '2025-10-01 04:20:03.233715', 'step': 3147, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:03.299440', 'step': 3147, 'epoch': 1} {'type': 'loss', 'content': 0.27940744161605835, 'timestamp': '2025-10-01 04:20:03.316500', 'step': 3148, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:03.368597', 'step': 3148, 'epoch': 1} {'type': 'loss', 'content': 0.19393303990364075, 'timestamp': '2025-10-01 04:20:03.370803', 'step': 3149, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:03.423725', 'step': 3149, 'epoch': 1} {'type': 'loss', 'content': 0.18720747530460358, 'timestamp': '2025-10-01 04:20:03.426134', 'step': 3150, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:03.481428', 'step': 3150, 'epoch': 1} {'type': 'loss', 'content': 0.3117455244064331, 'timestamp': '2025-10-01 04:20:03.483846', 'step': 3151, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:03.536984', 'step': 3151, 'epoch': 1} {'type': 'loss', 'content': 0.15073823928833008, 'timestamp': '2025-10-01 04:20:03.542685', 'step': 3152, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:03.597398', 'step': 3152, 'epoch': 1} {'type': 'loss', 'content': 0.16089476644992828, 'timestamp': '2025-10-01 04:20:03.599470', 'step': 3153, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:03.653322', 'step': 3153, 'epoch': 1} {'type': 'loss', 'content': 0.1262865960597992, 'timestamp': '2025-10-01 04:20:03.655736', 'step': 3154, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:03.713415', 'step': 3154, 'epoch': 1} {'type': 'loss', 'content': 0.1772773563861847, 'timestamp': '2025-10-01 04:20:03.716772', 'step': 3155, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:03.783193', 'step': 3155, 'epoch': 1} {'type': 'loss', 'content': 0.21564824879169464, 'timestamp': '2025-10-01 04:20:03.788905', 'step': 3156, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:20:03.841366', 'step': 3156, 'epoch': 1} {'type': 'loss', 'content': 0.21938784420490265, 'timestamp': '2025-10-01 04:20:03.843836', 'step': 3157, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:03.896932', 'step': 3157, 'epoch': 1} {'type': 'loss', 'content': 0.2417747676372528, 'timestamp': '2025-10-01 04:20:03.899146', 'step': 3158, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:03.953026', 'step': 3158, 'epoch': 1} {'type': 'loss', 'content': 0.13777399063110352, 'timestamp': '2025-10-01 04:20:03.955497', 'step': 3159, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:04.008243', 'step': 3159, 'epoch': 1} {'type': 'loss', 'content': 0.1903107911348343, 'timestamp': '2025-10-01 04:20:04.014284', 'step': 3160, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:20:04.067754', 'step': 3160, 'epoch': 1} {'type': 'loss', 'content': 0.1783137321472168, 'timestamp': '2025-10-01 04:20:04.074585', 'step': 3161, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:04.128390', 'step': 3161, 'epoch': 1} {'type': 'loss', 'content': 0.14341990649700165, 'timestamp': '2025-10-01 04:20:04.130636', 'step': 3162, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:04.183960', 'step': 3162, 'epoch': 1} {'type': 'loss', 'content': 0.24980004131793976, 'timestamp': '2025-10-01 04:20:04.186206', 'step': 3163, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:04.240016', 'step': 3163, 'epoch': 1} {'type': 'loss', 'content': 0.24734966456890106, 'timestamp': '2025-10-01 04:20:04.256262', 'step': 3164, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:20:04.318405', 'step': 3164, 'epoch': 1} {'type': 'loss', 'content': 0.2980973422527313, 'timestamp': '2025-10-01 04:20:04.330983', 'step': 3165, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:04.407395', 'step': 3165, 'epoch': 1} {'type': 'loss', 'content': 0.21270626783370972, 'timestamp': '2025-10-01 04:20:04.414077', 'step': 3166, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:04.475285', 'step': 3166, 'epoch': 1} {'type': 'loss', 'content': 0.21300853788852692, 'timestamp': '2025-10-01 04:20:04.485537', 'step': 3167, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:20:04.548943', 'step': 3167, 'epoch': 1} {'type': 'loss', 'content': 0.21338950097560883, 'timestamp': '2025-10-01 04:20:04.554949', 'step': 3168, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:04.612906', 'step': 3168, 'epoch': 1} {'type': 'loss', 'content': 0.09735991805791855, 'timestamp': '2025-10-01 04:20:04.617054', 'step': 3169, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:04.672356', 'step': 3169, 'epoch': 1} {'type': 'loss', 'content': 0.12355703860521317, 'timestamp': '2025-10-01 04:20:04.676407', 'step': 3170, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:04.732877', 'step': 3170, 'epoch': 1} {'type': 'loss', 'content': 0.16544051468372345, 'timestamp': '2025-10-01 04:20:04.735055', 'step': 3171, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:04.797917', 'step': 3171, 'epoch': 1} {'type': 'loss', 'content': 0.2253597527742386, 'timestamp': '2025-10-01 04:20:04.803718', 'step': 3172, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:04.859043', 'step': 3172, 'epoch': 1} {'type': 'loss', 'content': 0.1761334091424942, 'timestamp': '2025-10-01 04:20:04.861096', 'step': 3173, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:04.918984', 'step': 3173, 'epoch': 1} {'type': 'loss', 'content': 0.16569092869758606, 'timestamp': '2025-10-01 04:20:04.921272', 'step': 3174, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:20:04.975411', 'step': 3174, 'epoch': 1} {'type': 'loss', 'content': 0.17958121001720428, 'timestamp': '2025-10-01 04:20:04.977881', 'step': 3175, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:05.036099', 'step': 3175, 'epoch': 1} {'type': 'loss', 'content': 0.24133335053920746, 'timestamp': '2025-10-01 04:20:05.044443', 'step': 3176, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:05.097634', 'step': 3176, 'epoch': 1} {'type': 'loss', 'content': 0.18794329464435577, 'timestamp': '2025-10-01 04:20:05.100028', 'step': 3177, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:05.153471', 'step': 3177, 'epoch': 1} {'type': 'loss', 'content': 0.22108690440654755, 'timestamp': '2025-10-01 04:20:05.156250', 'step': 3178, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:05.213054', 'step': 3178, 'epoch': 1} {'type': 'loss', 'content': 0.17937463521957397, 'timestamp': '2025-10-01 04:20:05.215456', 'step': 3179, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:05.281131', 'step': 3179, 'epoch': 1} {'type': 'loss', 'content': 0.16257479786872864, 'timestamp': '2025-10-01 04:20:05.287376', 'step': 3180, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:05.349028', 'step': 3180, 'epoch': 1} {'type': 'loss', 'content': 0.16744263470172882, 'timestamp': '2025-10-01 04:20:05.351777', 'step': 3181, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:05.408280', 'step': 3181, 'epoch': 1} {'type': 'loss', 'content': 0.14956016838550568, 'timestamp': '2025-10-01 04:20:05.410762', 'step': 3182, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:05.464646', 'step': 3182, 'epoch': 1} {'type': 'loss', 'content': 0.1837443709373474, 'timestamp': '2025-10-01 04:20:05.466957', 'step': 3183, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:05.527679', 'step': 3183, 'epoch': 1} {'type': 'loss', 'content': 0.18033988773822784, 'timestamp': '2025-10-01 04:20:05.534303', 'step': 3184, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:05.593926', 'step': 3184, 'epoch': 1} {'type': 'loss', 'content': 0.27087947726249695, 'timestamp': '2025-10-01 04:20:05.596436', 'step': 3185, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:05.662619', 'step': 3185, 'epoch': 1} {'type': 'loss', 'content': 0.21741293370723724, 'timestamp': '2025-10-01 04:20:05.665015', 'step': 3186, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:05.718468', 'step': 3186, 'epoch': 1} {'type': 'loss', 'content': 0.1710144579410553, 'timestamp': '2025-10-01 04:20:05.720929', 'step': 3187, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:05.789320', 'step': 3187, 'epoch': 1} {'type': 'loss', 'content': 0.2353702336549759, 'timestamp': '2025-10-01 04:20:05.795041', 'step': 3188, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:05.860718', 'step': 3188, 'epoch': 1} {'type': 'loss', 'content': 0.1720271110534668, 'timestamp': '2025-10-01 04:20:05.863015', 'step': 3189, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:05.915616', 'step': 3189, 'epoch': 1} {'type': 'loss', 'content': 0.12951235473155975, 'timestamp': '2025-10-01 04:20:05.917888', 'step': 3190, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:05.980505', 'step': 3190, 'epoch': 1} {'type': 'loss', 'content': 0.1399252712726593, 'timestamp': '2025-10-01 04:20:05.987900', 'step': 3191, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:06.046095', 'step': 3191, 'epoch': 1} {'type': 'loss', 'content': 0.14283230900764465, 'timestamp': '2025-10-01 04:20:06.059536', 'step': 3192, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:06.118109', 'step': 3192, 'epoch': 1} {'type': 'loss', 'content': 0.11874537914991379, 'timestamp': '2025-10-01 04:20:06.120270', 'step': 3193, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:20:06.172991', 'step': 3193, 'epoch': 1} {'type': 'loss', 'content': 0.20878614485263824, 'timestamp': '2025-10-01 04:20:06.175214', 'step': 3194, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:06.228305', 'step': 3194, 'epoch': 1} {'type': 'loss', 'content': 0.1386120468378067, 'timestamp': '2025-10-01 04:20:06.231234', 'step': 3195, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:06.290054', 'step': 3195, 'epoch': 1} {'type': 'loss', 'content': 0.13011814653873444, 'timestamp': '2025-10-01 04:20:06.300714', 'step': 3196, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:06.352711', 'step': 3196, 'epoch': 1} {'type': 'loss', 'content': 0.10432472079992294, 'timestamp': '2025-10-01 04:20:06.354955', 'step': 3197, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:20:06.407763', 'step': 3197, 'epoch': 1} {'type': 'loss', 'content': 0.10125438868999481, 'timestamp': '2025-10-01 04:20:06.410087', 'step': 3198, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:06.463462', 'step': 3198, 'epoch': 1} {'type': 'loss', 'content': 0.21592235565185547, 'timestamp': '2025-10-01 04:20:06.465822', 'step': 3199, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:06.519317', 'step': 3199, 'epoch': 1} {'type': 'loss', 'content': 0.13964997231960297, 'timestamp': '2025-10-01 04:20:06.525214', 'step': 3200, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:06.578140', 'step': 3200, 'epoch': 1} {'type': 'loss', 'content': 0.20677095651626587, 'timestamp': '2025-10-01 04:20:06.580556', 'step': 3201, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:06.638050', 'step': 3201, 'epoch': 1} {'type': 'loss', 'content': 0.2621479630470276, 'timestamp': '2025-10-01 04:20:06.640219', 'step': 3202, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:06.693310', 'step': 3202, 'epoch': 1} {'type': 'loss', 'content': 0.18187153339385986, 'timestamp': '2025-10-01 04:20:06.695552', 'step': 3203, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:06.749362', 'step': 3203, 'epoch': 1} {'type': 'loss', 'content': 0.19630001485347748, 'timestamp': '2025-10-01 04:20:06.761353', 'step': 3204, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:06.813939', 'step': 3204, 'epoch': 1} {'type': 'loss', 'content': 0.2310233861207962, 'timestamp': '2025-10-01 04:20:06.817279', 'step': 3205, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:06.870450', 'step': 3205, 'epoch': 1} {'type': 'loss', 'content': 0.16530556976795197, 'timestamp': '2025-10-01 04:20:06.878121', 'step': 3206, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:20:06.932354', 'step': 3206, 'epoch': 1} {'type': 'loss', 'content': 0.13588522374629974, 'timestamp': '2025-10-01 04:20:06.934733', 'step': 3207, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:20:06.987617', 'step': 3207, 'epoch': 1} {'type': 'loss', 'content': 0.14363273978233337, 'timestamp': '2025-10-01 04:20:06.993405', 'step': 3208, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:07.045937', 'step': 3208, 'epoch': 1} {'type': 'loss', 'content': 0.09722388535737991, 'timestamp': '2025-10-01 04:20:07.048172', 'step': 3209, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:07.100992', 'step': 3209, 'epoch': 1} {'type': 'loss', 'content': 0.2446296364068985, 'timestamp': '2025-10-01 04:20:07.103352', 'step': 3210, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:07.156425', 'step': 3210, 'epoch': 1} {'type': 'loss', 'content': 0.20984043180942535, 'timestamp': '2025-10-01 04:20:07.158666', 'step': 3211, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:07.211123', 'step': 3211, 'epoch': 1} {'type': 'loss', 'content': 0.1506354659795761, 'timestamp': '2025-10-01 04:20:07.217694', 'step': 3212, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:07.270164', 'step': 3212, 'epoch': 1} {'type': 'loss', 'content': 0.16467589139938354, 'timestamp': '2025-10-01 04:20:07.272475', 'step': 3213, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:07.325687', 'step': 3213, 'epoch': 1} {'type': 'loss', 'content': 0.15494871139526367, 'timestamp': '2025-10-01 04:20:07.328208', 'step': 3214, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:07.381566', 'step': 3214, 'epoch': 1} {'type': 'loss', 'content': 0.2098039984703064, 'timestamp': '2025-10-01 04:20:07.383773', 'step': 3215, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:07.436641', 'step': 3215, 'epoch': 1} {'type': 'loss', 'content': 0.1465793401002884, 'timestamp': '2025-10-01 04:20:07.445900', 'step': 3216, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:07.508641', 'step': 3216, 'epoch': 1} {'type': 'loss', 'content': 0.24167703092098236, 'timestamp': '2025-10-01 04:20:07.511015', 'step': 3217, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:20:07.568090', 'step': 3217, 'epoch': 1} {'type': 'loss', 'content': 0.15697789192199707, 'timestamp': '2025-10-01 04:20:07.571089', 'step': 3218, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:07.624088', 'step': 3218, 'epoch': 1} {'type': 'loss', 'content': 0.16713055968284607, 'timestamp': '2025-10-01 04:20:07.626520', 'step': 3219, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:07.689668', 'step': 3219, 'epoch': 1} {'type': 'loss', 'content': 0.1311204582452774, 'timestamp': '2025-10-01 04:20:07.700901', 'step': 3220, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:07.754997', 'step': 3220, 'epoch': 1} {'type': 'loss', 'content': 0.19832561910152435, 'timestamp': '2025-10-01 04:20:07.757326', 'step': 3221, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:07.811168', 'step': 3221, 'epoch': 1} {'type': 'loss', 'content': 0.1696079522371292, 'timestamp': '2025-10-01 04:20:07.813535', 'step': 3222, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:07.868948', 'step': 3222, 'epoch': 1} {'type': 'loss', 'content': 0.26032063364982605, 'timestamp': '2025-10-01 04:20:07.871164', 'step': 3223, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-01 04:20:07.924024', 'step': 3223, 'epoch': 1} {'type': 'loss', 'content': 0.14330850541591644, 'timestamp': '2025-10-01 04:20:07.930607', 'step': 3224, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:20:07.983330', 'step': 3224, 'epoch': 1} {'type': 'loss', 'content': 0.1703433394432068, 'timestamp': '2025-10-01 04:20:07.985457', 'step': 3225, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:08.038392', 'step': 3225, 'epoch': 1} {'type': 'loss', 'content': 0.1556633710861206, 'timestamp': '2025-10-01 04:20:08.040962', 'step': 3226, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:08.094308', 'step': 3226, 'epoch': 1} {'type': 'loss', 'content': 0.18565154075622559, 'timestamp': '2025-10-01 04:20:08.096769', 'step': 3227, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:08.151883', 'step': 3227, 'epoch': 1} {'type': 'loss', 'content': 0.2861303389072418, 'timestamp': '2025-10-01 04:20:08.158210', 'step': 3228, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:08.214291', 'step': 3228, 'epoch': 1} {'type': 'loss', 'content': 0.1031051054596901, 'timestamp': '2025-10-01 04:20:08.216673', 'step': 3229, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:08.270258', 'step': 3229, 'epoch': 1} {'type': 'loss', 'content': 0.17449112236499786, 'timestamp': '2025-10-01 04:20:08.272411', 'step': 3230, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:20:08.326073', 'step': 3230, 'epoch': 1} {'type': 'loss', 'content': 0.19793708622455597, 'timestamp': '2025-10-01 04:20:08.328468', 'step': 3231, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:08.381906', 'step': 3231, 'epoch': 1} {'type': 'loss', 'content': 0.21591338515281677, 'timestamp': '2025-10-01 04:20:08.387894', 'step': 3232, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:20:08.440614', 'step': 3232, 'epoch': 1} {'type': 'loss', 'content': 0.10688602924346924, 'timestamp': '2025-10-01 04:20:08.443100', 'step': 3233, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:08.496522', 'step': 3233, 'epoch': 1} {'type': 'loss', 'content': 0.11126108467578888, 'timestamp': '2025-10-01 04:20:08.511907', 'step': 3234, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:20:08.564972', 'step': 3234, 'epoch': 1} {'type': 'loss', 'content': 0.23085565865039825, 'timestamp': '2025-10-01 04:20:08.567736', 'step': 3235, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:08.621258', 'step': 3235, 'epoch': 1} {'type': 'loss', 'content': 0.16418638825416565, 'timestamp': '2025-10-01 04:20:08.627075', 'step': 3236, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:08.679576', 'step': 3236, 'epoch': 1} {'type': 'loss', 'content': 0.24323870241641998, 'timestamp': '2025-10-01 04:20:08.681842', 'step': 3237, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:08.735604', 'step': 3237, 'epoch': 1} {'type': 'loss', 'content': 0.19332334399223328, 'timestamp': '2025-10-01 04:20:08.737796', 'step': 3238, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:08.790203', 'step': 3238, 'epoch': 1} {'type': 'loss', 'content': 0.14661280810832977, 'timestamp': '2025-10-01 04:20:08.792445', 'step': 3239, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:08.845565', 'step': 3239, 'epoch': 1} {'type': 'loss', 'content': 0.19473984837532043, 'timestamp': '2025-10-01 04:20:08.851457', 'step': 3240, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:08.904132', 'step': 3240, 'epoch': 1} {'type': 'loss', 'content': 0.18155628442764282, 'timestamp': '2025-10-01 04:20:08.906443', 'step': 3241, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:08.959334', 'step': 3241, 'epoch': 1} {'type': 'loss', 'content': 0.21439853310585022, 'timestamp': '2025-10-01 04:20:08.961702', 'step': 3242, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:09.014388', 'step': 3242, 'epoch': 1} {'type': 'loss', 'content': 0.14497676491737366, 'timestamp': '2025-10-01 04:20:09.016688', 'step': 3243, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:09.069581', 'step': 3243, 'epoch': 1} {'type': 'loss', 'content': 0.2547033727169037, 'timestamp': '2025-10-01 04:20:09.075596', 'step': 3244, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:09.134348', 'step': 3244, 'epoch': 1} {'type': 'loss', 'content': 0.08962924033403397, 'timestamp': '2025-10-01 04:20:09.136647', 'step': 3245, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:09.190729', 'step': 3245, 'epoch': 1} {'type': 'loss', 'content': 0.22244052588939667, 'timestamp': '2025-10-01 04:20:09.192917', 'step': 3246, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:09.248628', 'step': 3246, 'epoch': 1} {'type': 'loss', 'content': 0.10071059316396713, 'timestamp': '2025-10-01 04:20:09.250906', 'step': 3247, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:09.303427', 'step': 3247, 'epoch': 1} {'type': 'loss', 'content': 0.21510502696037292, 'timestamp': '2025-10-01 04:20:09.310074', 'step': 3248, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:09.364933', 'step': 3248, 'epoch': 1} {'type': 'loss', 'content': 0.2446964830160141, 'timestamp': '2025-10-01 04:20:09.367362', 'step': 3249, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:09.428560', 'step': 3249, 'epoch': 1} {'type': 'loss', 'content': 0.193747416138649, 'timestamp': '2025-10-01 04:20:09.431945', 'step': 3250, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:09.484699', 'step': 3250, 'epoch': 1} {'type': 'loss', 'content': 0.18218563497066498, 'timestamp': '2025-10-01 04:20:09.487001', 'step': 3251, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:09.541223', 'step': 3251, 'epoch': 1} {'type': 'loss', 'content': 0.15579146146774292, 'timestamp': '2025-10-01 04:20:09.547486', 'step': 3252, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:20:09.599810', 'step': 3252, 'epoch': 1} {'type': 'loss', 'content': 0.14032432436943054, 'timestamp': '2025-10-01 04:20:09.602210', 'step': 3253, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:09.659044', 'step': 3253, 'epoch': 1} {'type': 'loss', 'content': 0.09734635055065155, 'timestamp': '2025-10-01 04:20:09.661546', 'step': 3254, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:09.715163', 'step': 3254, 'epoch': 1} {'type': 'loss', 'content': 0.13839563727378845, 'timestamp': '2025-10-01 04:20:09.719282', 'step': 3255, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:09.772901', 'step': 3255, 'epoch': 1} {'type': 'loss', 'content': 0.08769332617521286, 'timestamp': '2025-10-01 04:20:09.778822', 'step': 3256, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:09.832288', 'step': 3256, 'epoch': 1} {'type': 'loss', 'content': 0.17888589203357697, 'timestamp': '2025-10-01 04:20:09.834689', 'step': 3257, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:09.887950', 'step': 3257, 'epoch': 1} {'type': 'loss', 'content': 0.2036423683166504, 'timestamp': '2025-10-01 04:20:09.890154', 'step': 3258, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:09.954053', 'step': 3258, 'epoch': 1} {'type': 'loss', 'content': 0.2274925410747528, 'timestamp': '2025-10-01 04:20:09.956432', 'step': 3259, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:10.009353', 'step': 3259, 'epoch': 1} {'type': 'loss', 'content': 0.13382916152477264, 'timestamp': '2025-10-01 04:20:10.015107', 'step': 3260, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:10.067770', 'step': 3260, 'epoch': 1} {'type': 'loss', 'content': 0.18651844561100006, 'timestamp': '2025-10-01 04:20:10.070292', 'step': 3261, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:10.123898', 'step': 3261, 'epoch': 1} {'type': 'loss', 'content': 0.11880405247211456, 'timestamp': '2025-10-01 04:20:10.126134', 'step': 3262, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-01 04:20:23.276065', 'step': 3262, 'epoch': 1} {'type': 'pplx', 'content': 11272.986894592834, 'timestamp': '2025-10-01 04:20:23.279288', 'step': 3262, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:23.334423', 'step': 3262, 'epoch': 1} {'type': 'loss', 'content': 0.17686757445335388, 'timestamp': '2025-10-01 04:20:23.336641', 'step': 3263, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:20:23.390818', 'step': 3263, 'epoch': 1} {'type': 'loss', 'content': 0.19322983920574188, 'timestamp': '2025-10-01 04:20:23.396945', 'step': 3264, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:20:23.450927', 'step': 3264, 'epoch': 1} {'type': 'loss', 'content': 0.1321171373128891, 'timestamp': '2025-10-01 04:20:23.453099', 'step': 3265, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:23.507178', 'step': 3265, 'epoch': 1} {'type': 'loss', 'content': 0.25486671924591064, 'timestamp': '2025-10-01 04:20:23.509506', 'step': 3266, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:23.562931', 'step': 3266, 'epoch': 1} {'type': 'loss', 'content': 0.23642006516456604, 'timestamp': '2025-10-01 04:20:23.565039', 'step': 3267, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:23.623394', 'step': 3267, 'epoch': 1} {'type': 'loss', 'content': 0.13215744495391846, 'timestamp': '2025-10-01 04:20:23.629328', 'step': 3268, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:23.681859', 'step': 3268, 'epoch': 1} {'type': 'loss', 'content': 0.19441817700862885, 'timestamp': '2025-10-01 04:20:23.684050', 'step': 3269, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:23.736801', 'step': 3269, 'epoch': 1} {'type': 'loss', 'content': 0.19710920751094818, 'timestamp': '2025-10-01 04:20:23.739028', 'step': 3270, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:23.792083', 'step': 3270, 'epoch': 1} {'type': 'loss', 'content': 0.16698499023914337, 'timestamp': '2025-10-01 04:20:23.794446', 'step': 3271, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:23.847562', 'step': 3271, 'epoch': 1} {'type': 'loss', 'content': 0.08243267983198166, 'timestamp': '2025-10-01 04:20:23.853692', 'step': 3272, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:23.906178', 'step': 3272, 'epoch': 1} {'type': 'loss', 'content': 0.18897844851016998, 'timestamp': '2025-10-01 04:20:23.908333', 'step': 3273, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:23.961497', 'step': 3273, 'epoch': 1} {'type': 'loss', 'content': 0.20113803446292877, 'timestamp': '2025-10-01 04:20:23.963560', 'step': 3274, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:24.018002', 'step': 3274, 'epoch': 1} {'type': 'loss', 'content': 0.08643949776887894, 'timestamp': '2025-10-01 04:20:24.020221', 'step': 3275, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:24.073665', 'step': 3275, 'epoch': 1} {'type': 'loss', 'content': 0.14839883148670197, 'timestamp': '2025-10-01 04:20:24.079285', 'step': 3276, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:24.131849', 'step': 3276, 'epoch': 1} {'type': 'loss', 'content': 0.1656900942325592, 'timestamp': '2025-10-01 04:20:24.134203', 'step': 3277, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:24.187623', 'step': 3277, 'epoch': 1} {'type': 'loss', 'content': 0.09808385372161865, 'timestamp': '2025-10-01 04:20:24.189606', 'step': 3278, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:24.242363', 'step': 3278, 'epoch': 1} {'type': 'loss', 'content': 0.21461816132068634, 'timestamp': '2025-10-01 04:20:24.244646', 'step': 3279, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:24.298529', 'step': 3279, 'epoch': 1} {'type': 'loss', 'content': 0.15383559465408325, 'timestamp': '2025-10-01 04:20:24.304295', 'step': 3280, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:20:24.357437', 'step': 3280, 'epoch': 1} {'type': 'loss', 'content': 0.12185792624950409, 'timestamp': '2025-10-01 04:20:24.359897', 'step': 3281, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:24.413748', 'step': 3281, 'epoch': 1} {'type': 'loss', 'content': 0.07373485714197159, 'timestamp': '2025-10-01 04:20:24.416132', 'step': 3282, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:24.470776', 'step': 3282, 'epoch': 1} {'type': 'loss', 'content': 0.20627421140670776, 'timestamp': '2025-10-01 04:20:24.472794', 'step': 3283, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:24.525501', 'step': 3283, 'epoch': 1} {'type': 'loss', 'content': 0.1773599535226822, 'timestamp': '2025-10-01 04:20:24.531295', 'step': 3284, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:24.583848', 'step': 3284, 'epoch': 1} {'type': 'loss', 'content': 0.24323949217796326, 'timestamp': '2025-10-01 04:20:24.586459', 'step': 3285, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:24.643399', 'step': 3285, 'epoch': 1} {'type': 'loss', 'content': 0.20351991057395935, 'timestamp': '2025-10-01 04:20:24.648068', 'step': 3286, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:24.702396', 'step': 3286, 'epoch': 1} {'type': 'loss', 'content': 0.13817726075649261, 'timestamp': '2025-10-01 04:20:24.704672', 'step': 3287, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:24.761008', 'step': 3287, 'epoch': 1} {'type': 'loss', 'content': 0.2179686576128006, 'timestamp': '2025-10-01 04:20:24.766931', 'step': 3288, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:20:24.819467', 'step': 3288, 'epoch': 1} {'type': 'loss', 'content': 0.204792782664299, 'timestamp': '2025-10-01 04:20:24.821563', 'step': 3289, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:24.874473', 'step': 3289, 'epoch': 1} {'type': 'loss', 'content': 0.2980845272541046, 'timestamp': '2025-10-01 04:20:24.876769', 'step': 3290, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:24.930130', 'step': 3290, 'epoch': 1} {'type': 'loss', 'content': 0.177270770072937, 'timestamp': '2025-10-01 04:20:24.932389', 'step': 3291, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:24.985499', 'step': 3291, 'epoch': 1} {'type': 'loss', 'content': 0.06210917606949806, 'timestamp': '2025-10-01 04:20:24.991577', 'step': 3292, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:25.044414', 'step': 3292, 'epoch': 1} {'type': 'loss', 'content': 0.3015109896659851, 'timestamp': '2025-10-01 04:20:25.048891', 'step': 3293, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:25.102780', 'step': 3293, 'epoch': 1} {'type': 'loss', 'content': 0.18875978887081146, 'timestamp': '2025-10-01 04:20:25.111568', 'step': 3294, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:25.168606', 'step': 3294, 'epoch': 1} {'type': 'loss', 'content': 0.14104047417640686, 'timestamp': '2025-10-01 04:20:25.170890', 'step': 3295, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:20:25.224767', 'step': 3295, 'epoch': 1} {'type': 'loss', 'content': 0.1421632617712021, 'timestamp': '2025-10-01 04:20:25.230674', 'step': 3296, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:25.283390', 'step': 3296, 'epoch': 1} {'type': 'loss', 'content': 0.13018424808979034, 'timestamp': '2025-10-01 04:20:25.285480', 'step': 3297, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:25.338938', 'step': 3297, 'epoch': 1} {'type': 'loss', 'content': 0.13011197745800018, 'timestamp': '2025-10-01 04:20:25.341138', 'step': 3298, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:25.394718', 'step': 3298, 'epoch': 1} {'type': 'loss', 'content': 0.12934021651744843, 'timestamp': '2025-10-01 04:20:25.397679', 'step': 3299, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:25.451091', 'step': 3299, 'epoch': 1} {'type': 'loss', 'content': 0.18094214797019958, 'timestamp': '2025-10-01 04:20:25.457093', 'step': 3300, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:25.517269', 'step': 3300, 'epoch': 1} {'type': 'loss', 'content': 0.15159770846366882, 'timestamp': '2025-10-01 04:20:25.519450', 'step': 3301, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:25.572668', 'step': 3301, 'epoch': 1} {'type': 'loss', 'content': 0.2572462856769562, 'timestamp': '2025-10-01 04:20:25.574940', 'step': 3302, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:25.629343', 'step': 3302, 'epoch': 1} {'type': 'loss', 'content': 0.21626970171928406, 'timestamp': '2025-10-01 04:20:25.631657', 'step': 3303, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:25.686018', 'step': 3303, 'epoch': 1} {'type': 'loss', 'content': 0.13970458507537842, 'timestamp': '2025-10-01 04:20:25.691806', 'step': 3304, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:20:25.747620', 'step': 3304, 'epoch': 1} {'type': 'loss', 'content': 0.27671757340431213, 'timestamp': '2025-10-01 04:20:25.749975', 'step': 3305, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:25.803203', 'step': 3305, 'epoch': 1} {'type': 'loss', 'content': 0.143229141831398, 'timestamp': '2025-10-01 04:20:25.805620', 'step': 3306, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:25.859231', 'step': 3306, 'epoch': 1} {'type': 'loss', 'content': 0.17823514342308044, 'timestamp': '2025-10-01 04:20:25.861454', 'step': 3307, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:25.921778', 'step': 3307, 'epoch': 1} {'type': 'loss', 'content': 0.1509312391281128, 'timestamp': '2025-10-01 04:20:25.927749', 'step': 3308, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:25.981103', 'step': 3308, 'epoch': 1} {'type': 'loss', 'content': 0.16291017830371857, 'timestamp': '2025-10-01 04:20:25.985573', 'step': 3309, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:26.038732', 'step': 3309, 'epoch': 1} {'type': 'loss', 'content': 0.17030194401741028, 'timestamp': '2025-10-01 04:20:26.040891', 'step': 3310, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:26.094671', 'step': 3310, 'epoch': 1} {'type': 'loss', 'content': 0.23171700537204742, 'timestamp': '2025-10-01 04:20:26.098972', 'step': 3311, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:26.155318', 'step': 3311, 'epoch': 1} {'type': 'loss', 'content': 0.1763228327035904, 'timestamp': '2025-10-01 04:20:26.162946', 'step': 3312, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:26.215677', 'step': 3312, 'epoch': 1} {'type': 'loss', 'content': 0.14037060737609863, 'timestamp': '2025-10-01 04:20:26.217905', 'step': 3313, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:20:26.274906', 'step': 3313, 'epoch': 1} {'type': 'loss', 'content': 0.1737954169511795, 'timestamp': '2025-10-01 04:20:26.277271', 'step': 3314, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:20:26.332597', 'step': 3314, 'epoch': 1} {'type': 'loss', 'content': 0.14576520025730133, 'timestamp': '2025-10-01 04:20:26.341657', 'step': 3315, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:26.398660', 'step': 3315, 'epoch': 1} {'type': 'loss', 'content': 0.13147251307964325, 'timestamp': '2025-10-01 04:20:26.411019', 'step': 3316, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:20:26.470274', 'step': 3316, 'epoch': 1} {'type': 'loss', 'content': 0.12830433249473572, 'timestamp': '2025-10-01 04:20:26.473742', 'step': 3317, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:26.527465', 'step': 3317, 'epoch': 1} {'type': 'loss', 'content': 0.15796619653701782, 'timestamp': '2025-10-01 04:20:26.531720', 'step': 3318, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:26.592496', 'step': 3318, 'epoch': 1} {'type': 'loss', 'content': 0.18758003413677216, 'timestamp': '2025-10-01 04:20:26.594593', 'step': 3319, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:26.656393', 'step': 3319, 'epoch': 1} {'type': 'loss', 'content': 0.138631209731102, 'timestamp': '2025-10-01 04:20:26.662320', 'step': 3320, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:26.714744', 'step': 3320, 'epoch': 1} {'type': 'loss', 'content': 0.2160043716430664, 'timestamp': '2025-10-01 04:20:26.716949', 'step': 3321, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:26.770339', 'step': 3321, 'epoch': 1} {'type': 'loss', 'content': 0.11316618323326111, 'timestamp': '2025-10-01 04:20:26.777586', 'step': 3322, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:20:26.856447', 'step': 3322, 'epoch': 1} {'type': 'loss', 'content': 0.10619719326496124, 'timestamp': '2025-10-01 04:20:26.862096', 'step': 3323, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:26.915622', 'step': 3323, 'epoch': 1} {'type': 'loss', 'content': 0.14724460244178772, 'timestamp': '2025-10-01 04:20:26.921551', 'step': 3324, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:26.976612', 'step': 3324, 'epoch': 1} {'type': 'loss', 'content': 0.31316912174224854, 'timestamp': '2025-10-01 04:20:26.978880', 'step': 3325, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:27.032064', 'step': 3325, 'epoch': 1} {'type': 'loss', 'content': 0.18045924603939056, 'timestamp': '2025-10-01 04:20:27.036251', 'step': 3326, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:27.093021', 'step': 3326, 'epoch': 1} {'type': 'loss', 'content': 0.2415693998336792, 'timestamp': '2025-10-01 04:20:27.095971', 'step': 3327, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-01 04:20:27.158999', 'step': 3327, 'epoch': 1} {'type': 'loss', 'content': 0.1956959217786789, 'timestamp': '2025-10-01 04:20:27.169675', 'step': 3328, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:27.222898', 'step': 3328, 'epoch': 1} {'type': 'loss', 'content': 0.2159428894519806, 'timestamp': '2025-10-01 04:20:27.225177', 'step': 3329, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:20:27.283598', 'step': 3329, 'epoch': 1} {'type': 'loss', 'content': 0.1463661938905716, 'timestamp': '2025-10-01 04:20:27.285687', 'step': 3330, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:20:27.340708', 'step': 3330, 'epoch': 1} {'type': 'loss', 'content': 0.10018350929021835, 'timestamp': '2025-10-01 04:20:27.343040', 'step': 3331, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:27.396263', 'step': 3331, 'epoch': 1} {'type': 'loss', 'content': 0.19445458054542542, 'timestamp': '2025-10-01 04:20:27.402049', 'step': 3332, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:27.454753', 'step': 3332, 'epoch': 1} {'type': 'loss', 'content': 0.1188787966966629, 'timestamp': '2025-10-01 04:20:27.457052', 'step': 3333, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:27.510334', 'step': 3333, 'epoch': 1} {'type': 'loss', 'content': 0.21489544212818146, 'timestamp': '2025-10-01 04:20:27.512463', 'step': 3334, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:20:27.566179', 'step': 3334, 'epoch': 1} {'type': 'loss', 'content': 0.196089968085289, 'timestamp': '2025-10-01 04:20:27.568420', 'step': 3335, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:27.622186', 'step': 3335, 'epoch': 1} {'type': 'loss', 'content': 0.17890533804893494, 'timestamp': '2025-10-01 04:20:27.628810', 'step': 3336, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:27.683643', 'step': 3336, 'epoch': 1} {'type': 'loss', 'content': 0.11120717972517014, 'timestamp': '2025-10-01 04:20:27.686417', 'step': 3337, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:27.741767', 'step': 3337, 'epoch': 1} {'type': 'loss', 'content': 0.15306812524795532, 'timestamp': '2025-10-01 04:20:27.744232', 'step': 3338, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:27.799415', 'step': 3338, 'epoch': 1} {'type': 'loss', 'content': 0.275709331035614, 'timestamp': '2025-10-01 04:20:27.802361', 'step': 3339, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-01 04:20:27.873949', 'step': 3339, 'epoch': 1} {'type': 'loss', 'content': 0.21669192612171173, 'timestamp': '2025-10-01 04:20:27.887144', 'step': 3340, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:27.945158', 'step': 3340, 'epoch': 1} {'type': 'loss', 'content': 0.22900187969207764, 'timestamp': '2025-10-01 04:20:27.947942', 'step': 3341, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:28.002539', 'step': 3341, 'epoch': 1} {'type': 'loss', 'content': 0.2640644609928131, 'timestamp': '2025-10-01 04:20:28.005157', 'step': 3342, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:28.059865', 'step': 3342, 'epoch': 1} {'type': 'loss', 'content': 0.17861954867839813, 'timestamp': '2025-10-01 04:20:28.062871', 'step': 3343, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:28.118489', 'step': 3343, 'epoch': 1} {'type': 'loss', 'content': 0.22519341111183167, 'timestamp': '2025-10-01 04:20:28.124868', 'step': 3344, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:28.179123', 'step': 3344, 'epoch': 1} {'type': 'loss', 'content': 0.17125436663627625, 'timestamp': '2025-10-01 04:20:28.181998', 'step': 3345, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:28.236792', 'step': 3345, 'epoch': 1} {'type': 'loss', 'content': 0.18447233736515045, 'timestamp': '2025-10-01 04:20:28.239220', 'step': 3346, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:28.294300', 'step': 3346, 'epoch': 1} {'type': 'loss', 'content': 0.1899285465478897, 'timestamp': '2025-10-01 04:20:28.297026', 'step': 3347, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:28.351710', 'step': 3347, 'epoch': 1} {'type': 'loss', 'content': 0.08634965866804123, 'timestamp': '2025-10-01 04:20:28.357992', 'step': 3348, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:28.412350', 'step': 3348, 'epoch': 1} {'type': 'loss', 'content': 0.298201322555542, 'timestamp': '2025-10-01 04:20:28.415503', 'step': 3349, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:28.471379', 'step': 3349, 'epoch': 1} {'type': 'loss', 'content': 0.1978805959224701, 'timestamp': '2025-10-01 04:20:28.473620', 'step': 3350, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:28.528175', 'step': 3350, 'epoch': 1} {'type': 'loss', 'content': 0.12316220253705978, 'timestamp': '2025-10-01 04:20:28.530883', 'step': 3351, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:28.585576', 'step': 3351, 'epoch': 1} {'type': 'loss', 'content': 0.16132183372974396, 'timestamp': '2025-10-01 04:20:28.591927', 'step': 3352, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:20:28.645789', 'step': 3352, 'epoch': 1} {'type': 'loss', 'content': 0.14041058719158173, 'timestamp': '2025-10-01 04:20:28.648469', 'step': 3353, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:28.703426', 'step': 3353, 'epoch': 1} {'type': 'loss', 'content': 0.13684670627117157, 'timestamp': '2025-10-01 04:20:28.711041', 'step': 3354, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:28.766996', 'step': 3354, 'epoch': 1} {'type': 'loss', 'content': 0.24438101053237915, 'timestamp': '2025-10-01 04:20:28.769529', 'step': 3355, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:20:28.829772', 'step': 3355, 'epoch': 1} {'type': 'loss', 'content': 0.194280743598938, 'timestamp': '2025-10-01 04:20:28.838944', 'step': 3356, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:28.894971', 'step': 3356, 'epoch': 1} {'type': 'loss', 'content': 0.1389857679605484, 'timestamp': '2025-10-01 04:20:28.897573', 'step': 3357, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:20:28.951465', 'step': 3357, 'epoch': 1} {'type': 'loss', 'content': 0.14545951783657074, 'timestamp': '2025-10-01 04:20:28.954334', 'step': 3358, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:20:29.008610', 'step': 3358, 'epoch': 1} {'type': 'loss', 'content': 0.0969371497631073, 'timestamp': '2025-10-01 04:20:29.011215', 'step': 3359, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:29.068246', 'step': 3359, 'epoch': 1} {'type': 'loss', 'content': 0.1987854540348053, 'timestamp': '2025-10-01 04:20:29.074499', 'step': 3360, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:29.128595', 'step': 3360, 'epoch': 1} {'type': 'loss', 'content': 0.16822507977485657, 'timestamp': '2025-10-01 04:20:29.133110', 'step': 3361, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:29.187718', 'step': 3361, 'epoch': 1} {'type': 'loss', 'content': 0.1812136322259903, 'timestamp': '2025-10-01 04:20:29.189941', 'step': 3362, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:29.244219', 'step': 3362, 'epoch': 1} {'type': 'loss', 'content': 0.12035146355628967, 'timestamp': '2025-10-01 04:20:29.246479', 'step': 3363, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:29.300723', 'step': 3363, 'epoch': 1} {'type': 'loss', 'content': 0.17288738489151, 'timestamp': '2025-10-01 04:20:29.306905', 'step': 3364, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:29.360908', 'step': 3364, 'epoch': 1} {'type': 'loss', 'content': 0.2633468210697174, 'timestamp': '2025-10-01 04:20:29.363135', 'step': 3365, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:29.417983', 'step': 3365, 'epoch': 1} {'type': 'loss', 'content': 0.11285717040300369, 'timestamp': '2025-10-01 04:20:29.421360', 'step': 3366, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:29.476271', 'step': 3366, 'epoch': 1} {'type': 'loss', 'content': 0.1917886883020401, 'timestamp': '2025-10-01 04:20:29.481519', 'step': 3367, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:20:29.536965', 'step': 3367, 'epoch': 1} {'type': 'loss', 'content': 0.16873498260974884, 'timestamp': '2025-10-01 04:20:29.543164', 'step': 3368, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:29.596215', 'step': 3368, 'epoch': 1} {'type': 'loss', 'content': 0.2315938025712967, 'timestamp': '2025-10-01 04:20:29.600770', 'step': 3369, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:29.654625', 'step': 3369, 'epoch': 1} {'type': 'loss', 'content': 0.12490813434123993, 'timestamp': '2025-10-01 04:20:29.656799', 'step': 3370, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:29.710464', 'step': 3370, 'epoch': 1} {'type': 'loss', 'content': 0.23914726078510284, 'timestamp': '2025-10-01 04:20:29.712662', 'step': 3371, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:29.765866', 'step': 3371, 'epoch': 1} {'type': 'loss', 'content': 0.14961981773376465, 'timestamp': '2025-10-01 04:20:29.771948', 'step': 3372, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:29.824666', 'step': 3372, 'epoch': 1} {'type': 'loss', 'content': 0.14557123184204102, 'timestamp': '2025-10-01 04:20:29.827034', 'step': 3373, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:20:29.880246', 'step': 3373, 'epoch': 1} {'type': 'loss', 'content': 0.16100259125232697, 'timestamp': '2025-10-01 04:20:29.882388', 'step': 3374, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:29.935624', 'step': 3374, 'epoch': 1} {'type': 'loss', 'content': 0.21754689514636993, 'timestamp': '2025-10-01 04:20:29.940270', 'step': 3375, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:29.996542', 'step': 3375, 'epoch': 1} {'type': 'loss', 'content': 0.2538747489452362, 'timestamp': '2025-10-01 04:20:30.003127', 'step': 3376, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:30.056346', 'step': 3376, 'epoch': 1} {'type': 'loss', 'content': 0.24990637600421906, 'timestamp': '2025-10-01 04:20:30.059219', 'step': 3377, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:30.116150', 'step': 3377, 'epoch': 1} {'type': 'loss', 'content': 0.171352356672287, 'timestamp': '2025-10-01 04:20:30.118357', 'step': 3378, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:30.173153', 'step': 3378, 'epoch': 1} {'type': 'loss', 'content': 0.14503981173038483, 'timestamp': '2025-10-01 04:20:30.175858', 'step': 3379, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:30.229168', 'step': 3379, 'epoch': 1} {'type': 'loss', 'content': 0.18989138305187225, 'timestamp': '2025-10-01 04:20:30.235710', 'step': 3380, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:30.288444', 'step': 3380, 'epoch': 1} {'type': 'loss', 'content': 0.1811559647321701, 'timestamp': '2025-10-01 04:20:30.290675', 'step': 3381, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:30.343759', 'step': 3381, 'epoch': 1} {'type': 'loss', 'content': 0.22459807991981506, 'timestamp': '2025-10-01 04:20:30.345968', 'step': 3382, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:30.400426', 'step': 3382, 'epoch': 1} {'type': 'loss', 'content': 0.19466125965118408, 'timestamp': '2025-10-01 04:20:30.402843', 'step': 3383, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:30.455946', 'step': 3383, 'epoch': 1} {'type': 'loss', 'content': 0.15861546993255615, 'timestamp': '2025-10-01 04:20:30.462152', 'step': 3384, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:30.517871', 'step': 3384, 'epoch': 1} {'type': 'loss', 'content': 0.20235688984394073, 'timestamp': '2025-10-01 04:20:30.520494', 'step': 3385, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:30.577464', 'step': 3385, 'epoch': 1} {'type': 'loss', 'content': 0.1963260918855667, 'timestamp': '2025-10-01 04:20:30.579828', 'step': 3386, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:30.636884', 'step': 3386, 'epoch': 1} {'type': 'loss', 'content': 0.1283227801322937, 'timestamp': '2025-10-01 04:20:30.639277', 'step': 3387, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:30.695071', 'step': 3387, 'epoch': 1} {'type': 'loss', 'content': 0.10496900230646133, 'timestamp': '2025-10-01 04:20:30.701500', 'step': 3388, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:30.757126', 'step': 3388, 'epoch': 1} {'type': 'loss', 'content': 0.13859765231609344, 'timestamp': '2025-10-01 04:20:30.759407', 'step': 3389, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:30.815714', 'step': 3389, 'epoch': 1} {'type': 'loss', 'content': 0.23135147988796234, 'timestamp': '2025-10-01 04:20:30.817959', 'step': 3390, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:30.874563', 'step': 3390, 'epoch': 1} {'type': 'loss', 'content': 0.16635563969612122, 'timestamp': '2025-10-01 04:20:30.876662', 'step': 3391, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:30.932715', 'step': 3391, 'epoch': 1} {'type': 'loss', 'content': 0.1969951093196869, 'timestamp': '2025-10-01 04:20:30.939331', 'step': 3392, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:30.996288', 'step': 3392, 'epoch': 1} {'type': 'loss', 'content': 0.11999289691448212, 'timestamp': '2025-10-01 04:20:30.998456', 'step': 3393, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:31.052712', 'step': 3393, 'epoch': 1} {'type': 'loss', 'content': 0.1267460584640503, 'timestamp': '2025-10-01 04:20:31.054672', 'step': 3394, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:31.110137', 'step': 3394, 'epoch': 1} {'type': 'loss', 'content': 0.11851520836353302, 'timestamp': '2025-10-01 04:20:31.112394', 'step': 3395, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:31.169952', 'step': 3395, 'epoch': 1} {'type': 'loss', 'content': 0.20289072394371033, 'timestamp': '2025-10-01 04:20:31.176898', 'step': 3396, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:31.231788', 'step': 3396, 'epoch': 1} {'type': 'loss', 'content': 0.17777346074581146, 'timestamp': '2025-10-01 04:20:31.234070', 'step': 3397, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:31.287853', 'step': 3397, 'epoch': 1} {'type': 'loss', 'content': 0.19963207840919495, 'timestamp': '2025-10-01 04:20:31.290163', 'step': 3398, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:31.352227', 'step': 3398, 'epoch': 1} {'type': 'loss', 'content': 0.20498324930667877, 'timestamp': '2025-10-01 04:20:31.354318', 'step': 3399, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:31.407252', 'step': 3399, 'epoch': 1} {'type': 'loss', 'content': 0.15246964991092682, 'timestamp': '2025-10-01 04:20:31.413456', 'step': 3400, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:31.466146', 'step': 3400, 'epoch': 1} {'type': 'loss', 'content': 0.1537800431251526, 'timestamp': '2025-10-01 04:20:31.468421', 'step': 3401, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:31.521321', 'step': 3401, 'epoch': 1} {'type': 'loss', 'content': 0.1264595091342926, 'timestamp': '2025-10-01 04:20:31.523698', 'step': 3402, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:20:31.576121', 'step': 3402, 'epoch': 1} {'type': 'loss', 'content': 0.2774944603443146, 'timestamp': '2025-10-01 04:20:31.578354', 'step': 3403, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:31.631230', 'step': 3403, 'epoch': 1} {'type': 'loss', 'content': 0.2002183049917221, 'timestamp': '2025-10-01 04:20:31.637039', 'step': 3404, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:31.689454', 'step': 3404, 'epoch': 1} {'type': 'loss', 'content': 0.18611285090446472, 'timestamp': '2025-10-01 04:20:31.691781', 'step': 3405, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:31.745103', 'step': 3405, 'epoch': 1} {'type': 'loss', 'content': 0.13340900838375092, 'timestamp': '2025-10-01 04:20:31.747323', 'step': 3406, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:20:31.801135', 'step': 3406, 'epoch': 1} {'type': 'loss', 'content': 0.19193989038467407, 'timestamp': '2025-10-01 04:20:31.803398', 'step': 3407, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:31.857537', 'step': 3407, 'epoch': 1} {'type': 'loss', 'content': 0.22317831218242645, 'timestamp': '2025-10-01 04:20:31.863788', 'step': 3408, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:31.916805', 'step': 3408, 'epoch': 1} {'type': 'loss', 'content': 0.1123502105474472, 'timestamp': '2025-10-01 04:20:31.919120', 'step': 3409, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:31.973497', 'step': 3409, 'epoch': 1} {'type': 'loss', 'content': 0.15253107249736786, 'timestamp': '2025-10-01 04:20:31.976171', 'step': 3410, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:32.029570', 'step': 3410, 'epoch': 1} {'type': 'loss', 'content': 0.06300092488527298, 'timestamp': '2025-10-01 04:20:32.031978', 'step': 3411, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:32.084550', 'step': 3411, 'epoch': 1} {'type': 'loss', 'content': 0.2697073817253113, 'timestamp': '2025-10-01 04:20:32.090605', 'step': 3412, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:32.143210', 'step': 3412, 'epoch': 1} {'type': 'loss', 'content': 0.18896125257015228, 'timestamp': '2025-10-01 04:20:32.151396', 'step': 3413, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:32.209397', 'step': 3413, 'epoch': 1} {'type': 'loss', 'content': 0.14892259240150452, 'timestamp': '2025-10-01 04:20:32.211457', 'step': 3414, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:32.264732', 'step': 3414, 'epoch': 1} {'type': 'loss', 'content': 0.12794062495231628, 'timestamp': '2025-10-01 04:20:32.267194', 'step': 3415, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:32.321097', 'step': 3415, 'epoch': 1} {'type': 'loss', 'content': 0.17687413096427917, 'timestamp': '2025-10-01 04:20:32.327116', 'step': 3416, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:32.379426', 'step': 3416, 'epoch': 1} {'type': 'loss', 'content': 0.14778462052345276, 'timestamp': '2025-10-01 04:20:32.381624', 'step': 3417, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:32.435105', 'step': 3417, 'epoch': 1} {'type': 'loss', 'content': 0.13637234270572662, 'timestamp': '2025-10-01 04:20:32.437324', 'step': 3418, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:32.490961', 'step': 3418, 'epoch': 1} {'type': 'loss', 'content': 0.1405860185623169, 'timestamp': '2025-10-01 04:20:32.493778', 'step': 3419, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:32.546712', 'step': 3419, 'epoch': 1} {'type': 'loss', 'content': 0.09682827442884445, 'timestamp': '2025-10-01 04:20:32.552347', 'step': 3420, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:32.604889', 'step': 3420, 'epoch': 1} {'type': 'loss', 'content': 0.22374680638313293, 'timestamp': '2025-10-01 04:20:32.607058', 'step': 3421, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:32.660092', 'step': 3421, 'epoch': 1} {'type': 'loss', 'content': 0.13898614048957825, 'timestamp': '2025-10-01 04:20:32.662677', 'step': 3422, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:20:32.715574', 'step': 3422, 'epoch': 1} {'type': 'loss', 'content': 0.16621388494968414, 'timestamp': '2025-10-01 04:20:32.717658', 'step': 3423, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:32.771581', 'step': 3423, 'epoch': 1} {'type': 'loss', 'content': 0.09901638329029083, 'timestamp': '2025-10-01 04:20:32.777437', 'step': 3424, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:32.830166', 'step': 3424, 'epoch': 1} {'type': 'loss', 'content': 0.12058484554290771, 'timestamp': '2025-10-01 04:20:32.832385', 'step': 3425, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:32.885166', 'step': 3425, 'epoch': 1} {'type': 'loss', 'content': 0.15382099151611328, 'timestamp': '2025-10-01 04:20:32.887426', 'step': 3426, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:32.940593', 'step': 3426, 'epoch': 1} {'type': 'loss', 'content': 0.25960370898246765, 'timestamp': '2025-10-01 04:20:32.943064', 'step': 3427, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:32.995916', 'step': 3427, 'epoch': 1} {'type': 'loss', 'content': 0.2739746570587158, 'timestamp': '2025-10-01 04:20:33.001763', 'step': 3428, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:33.053730', 'step': 3428, 'epoch': 1} {'type': 'loss', 'content': 0.2073371708393097, 'timestamp': '2025-10-01 04:20:33.057082', 'step': 3429, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:20:33.110273', 'step': 3429, 'epoch': 1} {'type': 'loss', 'content': 0.2570692002773285, 'timestamp': '2025-10-01 04:20:33.113996', 'step': 3430, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:33.168109', 'step': 3430, 'epoch': 1} {'type': 'loss', 'content': 0.16063031554222107, 'timestamp': '2025-10-01 04:20:33.170208', 'step': 3431, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:33.222776', 'step': 3431, 'epoch': 1} {'type': 'loss', 'content': 0.1784837692975998, 'timestamp': '2025-10-01 04:20:33.231826', 'step': 3432, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:33.285182', 'step': 3432, 'epoch': 1} {'type': 'loss', 'content': 0.09622987359762192, 'timestamp': '2025-10-01 04:20:33.287416', 'step': 3433, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:33.339893', 'step': 3433, 'epoch': 1} {'type': 'loss', 'content': 0.18829572200775146, 'timestamp': '2025-10-01 04:20:33.342174', 'step': 3434, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:33.395265', 'step': 3434, 'epoch': 1} {'type': 'loss', 'content': 0.18517225980758667, 'timestamp': '2025-10-01 04:20:33.397505', 'step': 3435, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:33.450482', 'step': 3435, 'epoch': 1} {'type': 'loss', 'content': 0.2764083445072174, 'timestamp': '2025-10-01 04:20:33.456186', 'step': 3436, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:20:33.508404', 'step': 3436, 'epoch': 1} {'type': 'loss', 'content': 0.12692496180534363, 'timestamp': '2025-10-01 04:20:33.510477', 'step': 3437, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:33.564174', 'step': 3437, 'epoch': 1} {'type': 'loss', 'content': 0.25059664249420166, 'timestamp': '2025-10-01 04:20:33.566338', 'step': 3438, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:20:33.619210', 'step': 3438, 'epoch': 1} {'type': 'loss', 'content': 0.12623825669288635, 'timestamp': '2025-10-01 04:20:33.621405', 'step': 3439, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:33.675959', 'step': 3439, 'epoch': 1} {'type': 'loss', 'content': 0.18767032027244568, 'timestamp': '2025-10-01 04:20:33.681865', 'step': 3440, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:20:33.735280', 'step': 3440, 'epoch': 1} {'type': 'loss', 'content': 0.16697227954864502, 'timestamp': '2025-10-01 04:20:33.737704', 'step': 3441, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:33.790747', 'step': 3441, 'epoch': 1} {'type': 'loss', 'content': 0.19124925136566162, 'timestamp': '2025-10-01 04:20:33.793308', 'step': 3442, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:33.846710', 'step': 3442, 'epoch': 1} {'type': 'loss', 'content': 0.16720522940158844, 'timestamp': '2025-10-01 04:20:33.849019', 'step': 3443, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:20:33.902011', 'step': 3443, 'epoch': 1} {'type': 'loss', 'content': 0.12746797502040863, 'timestamp': '2025-10-01 04:20:33.908010', 'step': 3444, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:33.961221', 'step': 3444, 'epoch': 1} {'type': 'loss', 'content': 0.20554007589817047, 'timestamp': '2025-10-01 04:20:33.963806', 'step': 3445, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:34.016883', 'step': 3445, 'epoch': 1} {'type': 'loss', 'content': 0.16588923335075378, 'timestamp': '2025-10-01 04:20:34.019586', 'step': 3446, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:34.072738', 'step': 3446, 'epoch': 1} {'type': 'loss', 'content': 0.1667173057794571, 'timestamp': '2025-10-01 04:20:34.075197', 'step': 3447, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:34.129000', 'step': 3447, 'epoch': 1} {'type': 'loss', 'content': 0.2173883616924286, 'timestamp': '2025-10-01 04:20:34.134715', 'step': 3448, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:20:34.187705', 'step': 3448, 'epoch': 1} {'type': 'loss', 'content': 0.13562792539596558, 'timestamp': '2025-10-01 04:20:34.189894', 'step': 3449, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:20:34.243407', 'step': 3449, 'epoch': 1} {'type': 'loss', 'content': 0.1645406186580658, 'timestamp': '2025-10-01 04:20:34.245567', 'step': 3450, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:34.298865', 'step': 3450, 'epoch': 1} {'type': 'loss', 'content': 0.1950538456439972, 'timestamp': '2025-10-01 04:20:34.301193', 'step': 3451, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:20:34.354389', 'step': 3451, 'epoch': 1} {'type': 'loss', 'content': 0.20135393738746643, 'timestamp': '2025-10-01 04:20:34.360208', 'step': 3452, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:34.413792', 'step': 3452, 'epoch': 1} {'type': 'loss', 'content': 0.19658343493938446, 'timestamp': '2025-10-01 04:20:34.415784', 'step': 3453, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:20:34.468543', 'step': 3453, 'epoch': 1} {'type': 'loss', 'content': 0.18828903138637543, 'timestamp': '2025-10-01 04:20:34.470762', 'step': 3454, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:34.524262', 'step': 3454, 'epoch': 1} {'type': 'loss', 'content': 0.13453738391399384, 'timestamp': '2025-10-01 04:20:34.526367', 'step': 3455, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:34.579128', 'step': 3455, 'epoch': 1} {'type': 'loss', 'content': 0.20132587850093842, 'timestamp': '2025-10-01 04:20:34.584607', 'step': 3456, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:34.637089', 'step': 3456, 'epoch': 1} {'type': 'loss', 'content': 0.07902407646179199, 'timestamp': '2025-10-01 04:20:34.639125', 'step': 3457, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:34.691994', 'step': 3457, 'epoch': 1} {'type': 'loss', 'content': 0.14725333452224731, 'timestamp': '2025-10-01 04:20:34.694446', 'step': 3458, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:34.749601', 'step': 3458, 'epoch': 1} {'type': 'loss', 'content': 0.2045953869819641, 'timestamp': '2025-10-01 04:20:34.753453', 'step': 3459, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:34.807976', 'step': 3459, 'epoch': 1} {'type': 'loss', 'content': 0.1396828591823578, 'timestamp': '2025-10-01 04:20:34.814226', 'step': 3460, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:34.867116', 'step': 3460, 'epoch': 1} {'type': 'loss', 'content': 0.13816383481025696, 'timestamp': '2025-10-01 04:20:34.869379', 'step': 3461, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:34.925668', 'step': 3461, 'epoch': 1} {'type': 'loss', 'content': 0.10072645545005798, 'timestamp': '2025-10-01 04:20:34.927965', 'step': 3462, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:34.980706', 'step': 3462, 'epoch': 1} {'type': 'loss', 'content': 0.14585503935813904, 'timestamp': '2025-10-01 04:20:34.982883', 'step': 3463, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:35.036090', 'step': 3463, 'epoch': 1} {'type': 'loss', 'content': 0.1551290601491928, 'timestamp': '2025-10-01 04:20:35.041947', 'step': 3464, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:35.094331', 'step': 3464, 'epoch': 1} {'type': 'loss', 'content': 0.17285053431987762, 'timestamp': '2025-10-01 04:20:35.096549', 'step': 3465, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:35.148939', 'step': 3465, 'epoch': 1} {'type': 'loss', 'content': 0.10688948631286621, 'timestamp': '2025-10-01 04:20:35.151177', 'step': 3466, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:35.204157', 'step': 3466, 'epoch': 1} {'type': 'loss', 'content': 0.20046472549438477, 'timestamp': '2025-10-01 04:20:35.206227', 'step': 3467, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:35.258686', 'step': 3467, 'epoch': 1} {'type': 'loss', 'content': 0.18050426244735718, 'timestamp': '2025-10-01 04:20:35.264914', 'step': 3468, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:35.319875', 'step': 3468, 'epoch': 1} {'type': 'loss', 'content': 0.1473170965909958, 'timestamp': '2025-10-01 04:20:35.322602', 'step': 3469, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:35.376149', 'step': 3469, 'epoch': 1} {'type': 'loss', 'content': 0.17880836129188538, 'timestamp': '2025-10-01 04:20:35.378669', 'step': 3470, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:20:35.432427', 'step': 3470, 'epoch': 1} {'type': 'loss', 'content': 0.2288568913936615, 'timestamp': '2025-10-01 04:20:35.434886', 'step': 3471, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:35.489460', 'step': 3471, 'epoch': 1} {'type': 'loss', 'content': 0.16149161756038666, 'timestamp': '2025-10-01 04:20:35.496308', 'step': 3472, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:35.549821', 'step': 3472, 'epoch': 1} {'type': 'loss', 'content': 0.15769408643245697, 'timestamp': '2025-10-01 04:20:35.553001', 'step': 3473, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:35.607409', 'step': 3473, 'epoch': 1} {'type': 'loss', 'content': 0.18668778240680695, 'timestamp': '2025-10-01 04:20:35.610069', 'step': 3474, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:20:35.664398', 'step': 3474, 'epoch': 1} {'type': 'loss', 'content': 0.1536266952753067, 'timestamp': '2025-10-01 04:20:35.668201', 'step': 3475, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:35.722014', 'step': 3475, 'epoch': 1} {'type': 'loss', 'content': 0.2667812407016754, 'timestamp': '2025-10-01 04:20:35.728363', 'step': 3476, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:35.781754', 'step': 3476, 'epoch': 1} {'type': 'loss', 'content': 0.14086812734603882, 'timestamp': '2025-10-01 04:20:35.783952', 'step': 3477, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:35.837388', 'step': 3477, 'epoch': 1} {'type': 'loss', 'content': 0.14744219183921814, 'timestamp': '2025-10-01 04:20:35.839663', 'step': 3478, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:35.893922', 'step': 3478, 'epoch': 1} {'type': 'loss', 'content': 0.15022359788417816, 'timestamp': '2025-10-01 04:20:35.896125', 'step': 3479, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:35.950671', 'step': 3479, 'epoch': 1} {'type': 'loss', 'content': 0.19317762553691864, 'timestamp': '2025-10-01 04:20:35.957091', 'step': 3480, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:36.015244', 'step': 3480, 'epoch': 1} {'type': 'loss', 'content': 0.1548435539007187, 'timestamp': '2025-10-01 04:20:36.017680', 'step': 3481, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:36.070394', 'step': 3481, 'epoch': 1} {'type': 'loss', 'content': 0.23039484024047852, 'timestamp': '2025-10-01 04:20:36.072589', 'step': 3482, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:20:36.126965', 'step': 3482, 'epoch': 1} {'type': 'loss', 'content': 0.2188841849565506, 'timestamp': '2025-10-01 04:20:36.129106', 'step': 3483, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:36.182228', 'step': 3483, 'epoch': 1} {'type': 'loss', 'content': 0.18254442512989044, 'timestamp': '2025-10-01 04:20:36.188106', 'step': 3484, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:36.241927', 'step': 3484, 'epoch': 1} {'type': 'loss', 'content': 0.14815853536128998, 'timestamp': '2025-10-01 04:20:36.244448', 'step': 3485, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:36.298222', 'step': 3485, 'epoch': 1} {'type': 'loss', 'content': 0.12970909476280212, 'timestamp': '2025-10-01 04:20:36.300819', 'step': 3486, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:36.354478', 'step': 3486, 'epoch': 1} {'type': 'loss', 'content': 0.18420343101024628, 'timestamp': '2025-10-01 04:20:36.356897', 'step': 3487, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:20:36.410194', 'step': 3487, 'epoch': 1} {'type': 'loss', 'content': 0.1309419721364975, 'timestamp': '2025-10-01 04:20:36.416489', 'step': 3488, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:36.470395', 'step': 3488, 'epoch': 1} {'type': 'loss', 'content': 0.21015508472919464, 'timestamp': '2025-10-01 04:20:36.473004', 'step': 3489, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:20:36.526762', 'step': 3489, 'epoch': 1} {'type': 'loss', 'content': 0.18017637729644775, 'timestamp': '2025-10-01 04:20:36.529541', 'step': 3490, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:36.583802', 'step': 3490, 'epoch': 1} {'type': 'loss', 'content': 0.22892451286315918, 'timestamp': '2025-10-01 04:20:36.585955', 'step': 3491, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:36.638994', 'step': 3491, 'epoch': 1} {'type': 'loss', 'content': 0.14952817559242249, 'timestamp': '2025-10-01 04:20:36.644520', 'step': 3492, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:36.699547', 'step': 3492, 'epoch': 1} {'type': 'loss', 'content': 0.08039393275976181, 'timestamp': '2025-10-01 04:20:36.701426', 'step': 3493, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:36.755600', 'step': 3493, 'epoch': 1} {'type': 'loss', 'content': 0.23039968311786652, 'timestamp': '2025-10-01 04:20:36.757667', 'step': 3494, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:36.810791', 'step': 3494, 'epoch': 1} {'type': 'loss', 'content': 0.13036008179187775, 'timestamp': '2025-10-01 04:20:36.813088', 'step': 3495, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:36.866224', 'step': 3495, 'epoch': 1} {'type': 'loss', 'content': 0.3442614674568176, 'timestamp': '2025-10-01 04:20:36.872043', 'step': 3496, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-01 04:20:36.926275', 'step': 3496, 'epoch': 1} {'type': 'loss', 'content': 0.10678528249263763, 'timestamp': '2025-10-01 04:20:36.928470', 'step': 3497, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:36.981753', 'step': 3497, 'epoch': 1} {'type': 'loss', 'content': 0.19099950790405273, 'timestamp': '2025-10-01 04:20:36.984245', 'step': 3498, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:37.038286', 'step': 3498, 'epoch': 1} {'type': 'loss', 'content': 0.12940898537635803, 'timestamp': '2025-10-01 04:20:37.040583', 'step': 3499, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:37.094177', 'step': 3499, 'epoch': 1} {'type': 'loss', 'content': 0.13752618432044983, 'timestamp': '2025-10-01 04:20:37.104655', 'step': 3500, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 3500', 'timestamp': '2025-10-01 04:20:37.471092', 'step': 3500, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:37.527853', 'step': 3500, 'epoch': 1} {'type': 'loss', 'content': 0.20889809727668762, 'timestamp': '2025-10-01 04:20:37.530004', 'step': 3501, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:37.586067', 'step': 3501, 'epoch': 1} {'type': 'loss', 'content': 0.12060178071260452, 'timestamp': '2025-10-01 04:20:37.588214', 'step': 3502, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:37.642720', 'step': 3502, 'epoch': 1} {'type': 'loss', 'content': 0.1120423749089241, 'timestamp': '2025-10-01 04:20:37.645994', 'step': 3503, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:37.702570', 'step': 3503, 'epoch': 1} {'type': 'loss', 'content': 0.18689481914043427, 'timestamp': '2025-10-01 04:20:37.708676', 'step': 3504, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:37.761218', 'step': 3504, 'epoch': 1} {'type': 'loss', 'content': 0.14912162721157074, 'timestamp': '2025-10-01 04:20:37.763482', 'step': 3505, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:20:37.817247', 'step': 3505, 'epoch': 1} {'type': 'loss', 'content': 0.14169782400131226, 'timestamp': '2025-10-01 04:20:37.819438', 'step': 3506, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:37.872860', 'step': 3506, 'epoch': 1} {'type': 'loss', 'content': 0.17994984984397888, 'timestamp': '2025-10-01 04:20:37.874666', 'step': 3507, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:37.927454', 'step': 3507, 'epoch': 1} {'type': 'loss', 'content': 0.1545064002275467, 'timestamp': '2025-10-01 04:20:37.932994', 'step': 3508, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:37.985706', 'step': 3508, 'epoch': 1} {'type': 'loss', 'content': 0.17463348805904388, 'timestamp': '2025-10-01 04:20:37.987599', 'step': 3509, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:38.040796', 'step': 3509, 'epoch': 1} {'type': 'loss', 'content': 0.19931884109973907, 'timestamp': '2025-10-01 04:20:38.042876', 'step': 3510, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:38.095465', 'step': 3510, 'epoch': 1} {'type': 'loss', 'content': 0.23597849905490875, 'timestamp': '2025-10-01 04:20:38.097984', 'step': 3511, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:38.151727', 'step': 3511, 'epoch': 1} {'type': 'loss', 'content': 0.16098645329475403, 'timestamp': '2025-10-01 04:20:38.157298', 'step': 3512, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:38.209699', 'step': 3512, 'epoch': 1} {'type': 'loss', 'content': 0.17845171689987183, 'timestamp': '2025-10-01 04:20:38.211936', 'step': 3513, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:38.265244', 'step': 3513, 'epoch': 1} {'type': 'loss', 'content': 0.17715629935264587, 'timestamp': '2025-10-01 04:20:38.267433', 'step': 3514, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:38.322256', 'step': 3514, 'epoch': 1} {'type': 'loss', 'content': 0.2919824421405792, 'timestamp': '2025-10-01 04:20:38.324482', 'step': 3515, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:38.378408', 'step': 3515, 'epoch': 1} {'type': 'loss', 'content': 0.15437452495098114, 'timestamp': '2025-10-01 04:20:38.384232', 'step': 3516, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:38.437237', 'step': 3516, 'epoch': 1} {'type': 'loss', 'content': 0.15768760442733765, 'timestamp': '2025-10-01 04:20:38.439501', 'step': 3517, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:20:38.492938', 'step': 3517, 'epoch': 1} {'type': 'loss', 'content': 0.14812855422496796, 'timestamp': '2025-10-01 04:20:38.495086', 'step': 3518, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:20:38.550309', 'step': 3518, 'epoch': 1} {'type': 'loss', 'content': 0.20626449584960938, 'timestamp': '2025-10-01 04:20:38.552567', 'step': 3519, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:38.605489', 'step': 3519, 'epoch': 1} {'type': 'loss', 'content': 0.20789223909378052, 'timestamp': '2025-10-01 04:20:38.611476', 'step': 3520, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:38.664169', 'step': 3520, 'epoch': 1} {'type': 'loss', 'content': 0.21829746663570404, 'timestamp': '2025-10-01 04:20:38.666334', 'step': 3521, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:38.720438', 'step': 3521, 'epoch': 1} {'type': 'loss', 'content': 0.1303747147321701, 'timestamp': '2025-10-01 04:20:38.722336', 'step': 3522, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:20:38.776620', 'step': 3522, 'epoch': 1} {'type': 'loss', 'content': 0.2156326025724411, 'timestamp': '2025-10-01 04:20:38.778453', 'step': 3523, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:38.831712', 'step': 3523, 'epoch': 1} {'type': 'loss', 'content': 0.12606196105480194, 'timestamp': '2025-10-01 04:20:38.837370', 'step': 3524, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-01 04:20:38.890604', 'step': 3524, 'epoch': 1} {'type': 'loss', 'content': 0.13710734248161316, 'timestamp': '2025-10-01 04:20:38.892662', 'step': 3525, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:38.946101', 'step': 3525, 'epoch': 1} {'type': 'loss', 'content': 0.1576422154903412, 'timestamp': '2025-10-01 04:20:38.948377', 'step': 3526, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:39.007712', 'step': 3526, 'epoch': 1} {'type': 'loss', 'content': 0.27028515934944153, 'timestamp': '2025-10-01 04:20:39.009670', 'step': 3527, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:20:39.063152', 'step': 3527, 'epoch': 1} {'type': 'loss', 'content': 0.22961534559726715, 'timestamp': '2025-10-01 04:20:39.069440', 'step': 3528, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:39.122358', 'step': 3528, 'epoch': 1} {'type': 'loss', 'content': 0.13188765943050385, 'timestamp': '2025-10-01 04:20:39.128074', 'step': 3529, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:39.182117', 'step': 3529, 'epoch': 1} {'type': 'loss', 'content': 0.19166351854801178, 'timestamp': '2025-10-01 04:20:39.184000', 'step': 3530, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:20:39.237207', 'step': 3530, 'epoch': 1} {'type': 'loss', 'content': 0.2324347198009491, 'timestamp': '2025-10-01 04:20:39.239259', 'step': 3531, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:39.292708', 'step': 3531, 'epoch': 1} {'type': 'loss', 'content': 0.1270872950553894, 'timestamp': '2025-10-01 04:20:39.298576', 'step': 3532, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:39.353380', 'step': 3532, 'epoch': 1} {'type': 'loss', 'content': 0.22332119941711426, 'timestamp': '2025-10-01 04:20:39.355602', 'step': 3533, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:39.410866', 'step': 3533, 'epoch': 1} {'type': 'loss', 'content': 0.1987481564283371, 'timestamp': '2025-10-01 04:20:39.413135', 'step': 3534, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:39.474297', 'step': 3534, 'epoch': 1} {'type': 'loss', 'content': 0.12999002635478973, 'timestamp': '2025-10-01 04:20:39.476456', 'step': 3535, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:39.533007', 'step': 3535, 'epoch': 1} {'type': 'loss', 'content': 0.19549094140529633, 'timestamp': '2025-10-01 04:20:39.539368', 'step': 3536, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:39.594620', 'step': 3536, 'epoch': 1} {'type': 'loss', 'content': 0.1620580106973648, 'timestamp': '2025-10-01 04:20:39.596412', 'step': 3537, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:39.651804', 'step': 3537, 'epoch': 1} {'type': 'loss', 'content': 0.07760004699230194, 'timestamp': '2025-10-01 04:20:39.654396', 'step': 3538, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:39.710251', 'step': 3538, 'epoch': 1} {'type': 'loss', 'content': 0.25239717960357666, 'timestamp': '2025-10-01 04:20:39.712640', 'step': 3539, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:39.769164', 'step': 3539, 'epoch': 1} {'type': 'loss', 'content': 0.24341896176338196, 'timestamp': '2025-10-01 04:20:39.776142', 'step': 3540, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:39.830577', 'step': 3540, 'epoch': 1} {'type': 'loss', 'content': 0.2791731655597687, 'timestamp': '2025-10-01 04:20:39.834371', 'step': 3541, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:39.890157', 'step': 3541, 'epoch': 1} {'type': 'loss', 'content': 0.22525779902935028, 'timestamp': '2025-10-01 04:20:39.896325', 'step': 3542, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:20:39.955311', 'step': 3542, 'epoch': 1} {'type': 'loss', 'content': 0.18246589601039886, 'timestamp': '2025-10-01 04:20:39.957834', 'step': 3543, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:40.015835', 'step': 3543, 'epoch': 1} {'type': 'loss', 'content': 0.12902262806892395, 'timestamp': '2025-10-01 04:20:40.021839', 'step': 3544, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:40.078717', 'step': 3544, 'epoch': 1} {'type': 'loss', 'content': 0.20080940425395966, 'timestamp': '2025-10-01 04:20:40.081070', 'step': 3545, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:40.135799', 'step': 3545, 'epoch': 1} {'type': 'loss', 'content': 0.24282196164131165, 'timestamp': '2025-10-01 04:20:40.138909', 'step': 3546, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:20:40.197820', 'step': 3546, 'epoch': 1} {'type': 'loss', 'content': 0.15603087842464447, 'timestamp': '2025-10-01 04:20:40.200247', 'step': 3547, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:20:40.260045', 'step': 3547, 'epoch': 1} {'type': 'loss', 'content': 0.0965195968747139, 'timestamp': '2025-10-01 04:20:40.272501', 'step': 3548, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:40.330116', 'step': 3548, 'epoch': 1} {'type': 'loss', 'content': 0.14265380799770355, 'timestamp': '2025-10-01 04:20:40.332522', 'step': 3549, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:40.387579', 'step': 3549, 'epoch': 1} {'type': 'loss', 'content': 0.21426230669021606, 'timestamp': '2025-10-01 04:20:40.389667', 'step': 3550, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:40.445907', 'step': 3550, 'epoch': 1} {'type': 'loss', 'content': 0.12330786138772964, 'timestamp': '2025-10-01 04:20:40.447917', 'step': 3551, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:40.502925', 'step': 3551, 'epoch': 1} {'type': 'loss', 'content': 0.12238376587629318, 'timestamp': '2025-10-01 04:20:40.509390', 'step': 3552, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:40.564672', 'step': 3552, 'epoch': 1} {'type': 'loss', 'content': 0.11995714902877808, 'timestamp': '2025-10-01 04:20:40.567221', 'step': 3553, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:20:40.624257', 'step': 3553, 'epoch': 1} {'type': 'loss', 'content': 0.10102605074644089, 'timestamp': '2025-10-01 04:20:40.626843', 'step': 3554, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:40.683025', 'step': 3554, 'epoch': 1} {'type': 'loss', 'content': 0.16685451567173004, 'timestamp': '2025-10-01 04:20:40.684992', 'step': 3555, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:40.739582', 'step': 3555, 'epoch': 1} {'type': 'loss', 'content': 0.1799880415201187, 'timestamp': '2025-10-01 04:20:40.747185', 'step': 3556, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:40.803409', 'step': 3556, 'epoch': 1} {'type': 'loss', 'content': 0.15823626518249512, 'timestamp': '2025-10-01 04:20:40.805418', 'step': 3557, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:40.858837', 'step': 3557, 'epoch': 1} {'type': 'loss', 'content': 0.15722927451133728, 'timestamp': '2025-10-01 04:20:40.861541', 'step': 3558, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:40.926098', 'step': 3558, 'epoch': 1} {'type': 'loss', 'content': 0.2601350247859955, 'timestamp': '2025-10-01 04:20:40.928420', 'step': 3559, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:40.988891', 'step': 3559, 'epoch': 1} {'type': 'loss', 'content': 0.090414859354496, 'timestamp': '2025-10-01 04:20:41.004268', 'step': 3560, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:41.060784', 'step': 3560, 'epoch': 1} {'type': 'loss', 'content': 0.2314223200082779, 'timestamp': '2025-10-01 04:20:41.062795', 'step': 3561, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:20:41.116248', 'step': 3561, 'epoch': 1} {'type': 'loss', 'content': 0.19025065004825592, 'timestamp': '2025-10-01 04:20:41.118470', 'step': 3562, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:20:41.171553', 'step': 3562, 'epoch': 1} {'type': 'loss', 'content': 0.12435448169708252, 'timestamp': '2025-10-01 04:20:41.178017', 'step': 3563, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:41.231960', 'step': 3563, 'epoch': 1} {'type': 'loss', 'content': 0.184370219707489, 'timestamp': '2025-10-01 04:20:41.238373', 'step': 3564, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:41.291157', 'step': 3564, 'epoch': 1} {'type': 'loss', 'content': 0.14257720112800598, 'timestamp': '2025-10-01 04:20:41.293430', 'step': 3565, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:20:41.347173', 'step': 3565, 'epoch': 1} {'type': 'loss', 'content': 0.24595285952091217, 'timestamp': '2025-10-01 04:20:41.351466', 'step': 3566, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:41.405209', 'step': 3566, 'epoch': 1} {'type': 'loss', 'content': 0.09780734032392502, 'timestamp': '2025-10-01 04:20:41.407396', 'step': 3567, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:41.460118', 'step': 3567, 'epoch': 1} {'type': 'loss', 'content': 0.12550567090511322, 'timestamp': '2025-10-01 04:20:41.465965', 'step': 3568, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:20:41.521616', 'step': 3568, 'epoch': 1} {'type': 'loss', 'content': 0.21505393087863922, 'timestamp': '2025-10-01 04:20:41.523893', 'step': 3569, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:41.576693', 'step': 3569, 'epoch': 1} {'type': 'loss', 'content': 0.19398923218250275, 'timestamp': '2025-10-01 04:20:41.579058', 'step': 3570, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:20:41.632719', 'step': 3570, 'epoch': 1} {'type': 'loss', 'content': 0.21697814762592316, 'timestamp': '2025-10-01 04:20:41.634851', 'step': 3571, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:41.687447', 'step': 3571, 'epoch': 1} {'type': 'loss', 'content': 0.16234514117240906, 'timestamp': '2025-10-01 04:20:41.693099', 'step': 3572, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:20:41.745941', 'step': 3572, 'epoch': 1} {'type': 'loss', 'content': 0.15527571737766266, 'timestamp': '2025-10-01 04:20:41.748115', 'step': 3573, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:41.800809', 'step': 3573, 'epoch': 1} {'type': 'loss', 'content': 0.22908464074134827, 'timestamp': '2025-10-01 04:20:41.803172', 'step': 3574, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:20:41.856902', 'step': 3574, 'epoch': 1} {'type': 'loss', 'content': 0.17498596012592316, 'timestamp': '2025-10-01 04:20:41.859300', 'step': 3575, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:41.912315', 'step': 3575, 'epoch': 1} {'type': 'loss', 'content': 0.20739275217056274, 'timestamp': '2025-10-01 04:20:41.917863', 'step': 3576, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:41.970263', 'step': 3576, 'epoch': 1} {'type': 'loss', 'content': 0.23422688245773315, 'timestamp': '2025-10-01 04:20:41.972477', 'step': 3577, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:42.024881', 'step': 3577, 'epoch': 1} {'type': 'loss', 'content': 0.17412760853767395, 'timestamp': '2025-10-01 04:20:42.027071', 'step': 3578, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:42.079589', 'step': 3578, 'epoch': 1} {'type': 'loss', 'content': 0.08196809142827988, 'timestamp': '2025-10-01 04:20:42.081781', 'step': 3579, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:42.135105', 'step': 3579, 'epoch': 1} {'type': 'loss', 'content': 0.10059963166713715, 'timestamp': '2025-10-01 04:20:42.140954', 'step': 3580, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:42.193533', 'step': 3580, 'epoch': 1} {'type': 'loss', 'content': 0.11646082997322083, 'timestamp': '2025-10-01 04:20:42.195680', 'step': 3581, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:42.248674', 'step': 3581, 'epoch': 1} {'type': 'loss', 'content': 0.1468525230884552, 'timestamp': '2025-10-01 04:20:42.250579', 'step': 3582, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:42.303579', 'step': 3582, 'epoch': 1} {'type': 'loss', 'content': 0.17941217124462128, 'timestamp': '2025-10-01 04:20:42.305663', 'step': 3583, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:20:42.358282', 'step': 3583, 'epoch': 1} {'type': 'loss', 'content': 0.13379265367984772, 'timestamp': '2025-10-01 04:20:42.364010', 'step': 3584, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:42.416338', 'step': 3584, 'epoch': 1} {'type': 'loss', 'content': 0.2116953283548355, 'timestamp': '2025-10-01 04:20:42.418433', 'step': 3585, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:42.471930', 'step': 3585, 'epoch': 1} {'type': 'loss', 'content': 0.2975784242153168, 'timestamp': '2025-10-01 04:20:42.474241', 'step': 3586, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:42.528748', 'step': 3586, 'epoch': 1} {'type': 'loss', 'content': 0.13453327119350433, 'timestamp': '2025-10-01 04:20:42.530900', 'step': 3587, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:42.583875', 'step': 3587, 'epoch': 1} {'type': 'loss', 'content': 0.1395578533411026, 'timestamp': '2025-10-01 04:20:42.590198', 'step': 3588, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:42.643086', 'step': 3588, 'epoch': 1} {'type': 'loss', 'content': 0.16537809371948242, 'timestamp': '2025-10-01 04:20:42.645350', 'step': 3589, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:42.698180', 'step': 3589, 'epoch': 1} {'type': 'loss', 'content': 0.20583021640777588, 'timestamp': '2025-10-01 04:20:42.700419', 'step': 3590, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:42.753440', 'step': 3590, 'epoch': 1} {'type': 'loss', 'content': 0.11383768916130066, 'timestamp': '2025-10-01 04:20:42.755565', 'step': 3591, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:42.808684', 'step': 3591, 'epoch': 1} {'type': 'loss', 'content': 0.13872669637203217, 'timestamp': '2025-10-01 04:20:42.814398', 'step': 3592, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:20:42.867778', 'step': 3592, 'epoch': 1} {'type': 'loss', 'content': 0.1605582982301712, 'timestamp': '2025-10-01 04:20:42.870273', 'step': 3593, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:42.924570', 'step': 3593, 'epoch': 1} {'type': 'loss', 'content': 0.19169440865516663, 'timestamp': '2025-10-01 04:20:42.927102', 'step': 3594, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:42.980837', 'step': 3594, 'epoch': 1} {'type': 'loss', 'content': 0.15813922882080078, 'timestamp': '2025-10-01 04:20:42.983344', 'step': 3595, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:43.038494', 'step': 3595, 'epoch': 1} {'type': 'loss', 'content': 0.11488589644432068, 'timestamp': '2025-10-01 04:20:43.044750', 'step': 3596, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:20:43.098866', 'step': 3596, 'epoch': 1} {'type': 'loss', 'content': 0.21530045568943024, 'timestamp': '2025-10-01 04:20:43.101155', 'step': 3597, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:20:43.154828', 'step': 3597, 'epoch': 1} {'type': 'loss', 'content': 0.13178417086601257, 'timestamp': '2025-10-01 04:20:43.156961', 'step': 3598, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:43.210707', 'step': 3598, 'epoch': 1} {'type': 'loss', 'content': 0.14514859020709991, 'timestamp': '2025-10-01 04:20:43.212906', 'step': 3599, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:43.265966', 'step': 3599, 'epoch': 1} {'type': 'loss', 'content': 0.20611314475536346, 'timestamp': '2025-10-01 04:20:43.271669', 'step': 3600, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:43.324041', 'step': 3600, 'epoch': 1} {'type': 'loss', 'content': 0.1917467713356018, 'timestamp': '2025-10-01 04:20:43.326292', 'step': 3601, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:43.379319', 'step': 3601, 'epoch': 1} {'type': 'loss', 'content': 0.1222616508603096, 'timestamp': '2025-10-01 04:20:43.382233', 'step': 3602, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:43.435942', 'step': 3602, 'epoch': 1} {'type': 'loss', 'content': 0.11716313660144806, 'timestamp': '2025-10-01 04:20:43.438337', 'step': 3603, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:43.491316', 'step': 3603, 'epoch': 1} {'type': 'loss', 'content': 0.16855370998382568, 'timestamp': '2025-10-01 04:20:43.497035', 'step': 3604, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:43.551117', 'step': 3604, 'epoch': 1} {'type': 'loss', 'content': 0.10644084960222244, 'timestamp': '2025-10-01 04:20:43.554238', 'step': 3605, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:20:43.607471', 'step': 3605, 'epoch': 1} {'type': 'loss', 'content': 0.16187629103660583, 'timestamp': '2025-10-01 04:20:43.609584', 'step': 3606, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:43.662956', 'step': 3606, 'epoch': 1} {'type': 'loss', 'content': 0.2607954442501068, 'timestamp': '2025-10-01 04:20:43.665155', 'step': 3607, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:43.717968', 'step': 3607, 'epoch': 1} {'type': 'loss', 'content': 0.25455793738365173, 'timestamp': '2025-10-01 04:20:43.723716', 'step': 3608, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:43.776174', 'step': 3608, 'epoch': 1} {'type': 'loss', 'content': 0.17850755155086517, 'timestamp': '2025-10-01 04:20:43.778468', 'step': 3609, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:43.831394', 'step': 3609, 'epoch': 1} {'type': 'loss', 'content': 0.14179793000221252, 'timestamp': '2025-10-01 04:20:43.833929', 'step': 3610, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:43.887131', 'step': 3610, 'epoch': 1} {'type': 'loss', 'content': 0.23461930453777313, 'timestamp': '2025-10-01 04:20:43.889561', 'step': 3611, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:43.942743', 'step': 3611, 'epoch': 1} {'type': 'loss', 'content': 0.24309463798999786, 'timestamp': '2025-10-01 04:20:43.948464', 'step': 3612, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:20:44.001620', 'step': 3612, 'epoch': 1} {'type': 'loss', 'content': 0.1351698786020279, 'timestamp': '2025-10-01 04:20:44.003820', 'step': 3613, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:44.056634', 'step': 3613, 'epoch': 1} {'type': 'loss', 'content': 0.21231567859649658, 'timestamp': '2025-10-01 04:20:44.058952', 'step': 3614, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:20:44.113124', 'step': 3614, 'epoch': 1} {'type': 'loss', 'content': 0.09081023931503296, 'timestamp': '2025-10-01 04:20:44.115940', 'step': 3615, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:44.171018', 'step': 3615, 'epoch': 1} {'type': 'loss', 'content': 0.1989462673664093, 'timestamp': '2025-10-01 04:20:44.177440', 'step': 3616, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:20:44.231425', 'step': 3616, 'epoch': 1} {'type': 'loss', 'content': 0.1281224489212036, 'timestamp': '2025-10-01 04:20:44.234387', 'step': 3617, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:44.288777', 'step': 3617, 'epoch': 1} {'type': 'loss', 'content': 0.17435526847839355, 'timestamp': '2025-10-01 04:20:44.291823', 'step': 3618, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:44.355017', 'step': 3618, 'epoch': 1} {'type': 'loss', 'content': 0.18600934743881226, 'timestamp': '2025-10-01 04:20:44.357209', 'step': 3619, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:44.411174', 'step': 3619, 'epoch': 1} {'type': 'loss', 'content': 0.2267448604106903, 'timestamp': '2025-10-01 04:20:44.417498', 'step': 3620, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:44.471102', 'step': 3620, 'epoch': 1} {'type': 'loss', 'content': 0.18097074329853058, 'timestamp': '2025-10-01 04:20:44.473363', 'step': 3621, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:20:44.528430', 'step': 3621, 'epoch': 1} {'type': 'loss', 'content': 0.13215585052967072, 'timestamp': '2025-10-01 04:20:44.530755', 'step': 3622, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:44.585833', 'step': 3622, 'epoch': 1} {'type': 'loss', 'content': 0.14796991646289825, 'timestamp': '2025-10-01 04:20:44.587600', 'step': 3623, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:44.641786', 'step': 3623, 'epoch': 1} {'type': 'loss', 'content': 0.2795731723308563, 'timestamp': '2025-10-01 04:20:44.647742', 'step': 3624, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:44.700473', 'step': 3624, 'epoch': 1} {'type': 'loss', 'content': 0.1967497169971466, 'timestamp': '2025-10-01 04:20:44.702565', 'step': 3625, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:44.755942', 'step': 3625, 'epoch': 1} {'type': 'loss', 'content': 0.09289560467004776, 'timestamp': '2025-10-01 04:20:44.758032', 'step': 3626, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:44.811173', 'step': 3626, 'epoch': 1} {'type': 'loss', 'content': 0.1394428312778473, 'timestamp': '2025-10-01 04:20:44.814262', 'step': 3627, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:44.867195', 'step': 3627, 'epoch': 1} {'type': 'loss', 'content': 0.14691241085529327, 'timestamp': '2025-10-01 04:20:44.873154', 'step': 3628, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:44.925507', 'step': 3628, 'epoch': 1} {'type': 'loss', 'content': 0.2452753484249115, 'timestamp': '2025-10-01 04:20:44.927788', 'step': 3629, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:44.980915', 'step': 3629, 'epoch': 1} {'type': 'loss', 'content': 0.18846702575683594, 'timestamp': '2025-10-01 04:20:44.983412', 'step': 3630, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:45.037573', 'step': 3630, 'epoch': 1} {'type': 'loss', 'content': 0.1852593719959259, 'timestamp': '2025-10-01 04:20:45.040003', 'step': 3631, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:45.093722', 'step': 3631, 'epoch': 1} {'type': 'loss', 'content': 0.1920827180147171, 'timestamp': '2025-10-01 04:20:45.100519', 'step': 3632, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:45.154350', 'step': 3632, 'epoch': 1} {'type': 'loss', 'content': 0.26781097054481506, 'timestamp': '2025-10-01 04:20:45.156982', 'step': 3633, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:45.210995', 'step': 3633, 'epoch': 1} {'type': 'loss', 'content': 0.16667810082435608, 'timestamp': '2025-10-01 04:20:45.212949', 'step': 3634, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:20:45.266003', 'step': 3634, 'epoch': 1} {'type': 'loss', 'content': 0.10686659067869186, 'timestamp': '2025-10-01 04:20:45.268110', 'step': 3635, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:45.321354', 'step': 3635, 'epoch': 1} {'type': 'loss', 'content': 0.23188434541225433, 'timestamp': '2025-10-01 04:20:45.327299', 'step': 3636, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:20:45.380728', 'step': 3636, 'epoch': 1} {'type': 'loss', 'content': 0.17363141477108002, 'timestamp': '2025-10-01 04:20:45.382858', 'step': 3637, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:45.436330', 'step': 3637, 'epoch': 1} {'type': 'loss', 'content': 0.15074412524700165, 'timestamp': '2025-10-01 04:20:45.438443', 'step': 3638, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:45.491611', 'step': 3638, 'epoch': 1} {'type': 'loss', 'content': 0.1686275750398636, 'timestamp': '2025-10-01 04:20:45.493550', 'step': 3639, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:45.546239', 'step': 3639, 'epoch': 1} {'type': 'loss', 'content': 0.2038152664899826, 'timestamp': '2025-10-01 04:20:45.552001', 'step': 3640, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:20:45.605783', 'step': 3640, 'epoch': 1} {'type': 'loss', 'content': 0.23244209587574005, 'timestamp': '2025-10-01 04:20:45.615910', 'step': 3641, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:45.671482', 'step': 3641, 'epoch': 1} {'type': 'loss', 'content': 0.18250812590122223, 'timestamp': '2025-10-01 04:20:45.673556', 'step': 3642, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:45.727852', 'step': 3642, 'epoch': 1} {'type': 'loss', 'content': 0.15762244164943695, 'timestamp': '2025-10-01 04:20:45.730059', 'step': 3643, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:45.783235', 'step': 3643, 'epoch': 1} {'type': 'loss', 'content': 0.12559908628463745, 'timestamp': '2025-10-01 04:20:45.793289', 'step': 3644, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:20:45.846774', 'step': 3644, 'epoch': 1} {'type': 'loss', 'content': 0.12320956587791443, 'timestamp': '2025-10-01 04:20:45.848858', 'step': 3645, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:20:45.902454', 'step': 3645, 'epoch': 1} {'type': 'loss', 'content': 0.1419486701488495, 'timestamp': '2025-10-01 04:20:45.904783', 'step': 3646, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:45.958367', 'step': 3646, 'epoch': 1} {'type': 'loss', 'content': 0.16007056832313538, 'timestamp': '2025-10-01 04:20:45.961302', 'step': 3647, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:46.014573', 'step': 3647, 'epoch': 1} {'type': 'loss', 'content': 0.2576868534088135, 'timestamp': '2025-10-01 04:20:46.020765', 'step': 3648, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:46.078114', 'step': 3648, 'epoch': 1} {'type': 'loss', 'content': 0.18999691307544708, 'timestamp': '2025-10-01 04:20:46.080096', 'step': 3649, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:20:46.134452', 'step': 3649, 'epoch': 1} {'type': 'loss', 'content': 0.28603771328926086, 'timestamp': '2025-10-01 04:20:46.136902', 'step': 3650, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:46.190243', 'step': 3650, 'epoch': 1} {'type': 'loss', 'content': 0.14979369938373566, 'timestamp': '2025-10-01 04:20:46.192415', 'step': 3651, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-01 04:20:46.264402', 'step': 3651, 'epoch': 1} {'type': 'loss', 'content': 0.12166374176740646, 'timestamp': '2025-10-01 04:20:46.272091', 'step': 3652, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:46.324844', 'step': 3652, 'epoch': 1} {'type': 'loss', 'content': 0.14123941957950592, 'timestamp': '2025-10-01 04:20:46.326908', 'step': 3653, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:46.380183', 'step': 3653, 'epoch': 1} {'type': 'loss', 'content': 0.1386410892009735, 'timestamp': '2025-10-01 04:20:46.383252', 'step': 3654, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:46.439977', 'step': 3654, 'epoch': 1} {'type': 'loss', 'content': 0.1275184154510498, 'timestamp': '2025-10-01 04:20:46.442405', 'step': 3655, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:46.495193', 'step': 3655, 'epoch': 1} {'type': 'loss', 'content': 0.16343402862548828, 'timestamp': '2025-10-01 04:20:46.501026', 'step': 3656, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:46.553265', 'step': 3656, 'epoch': 1} {'type': 'loss', 'content': 0.1953880935907364, 'timestamp': '2025-10-01 04:20:46.555294', 'step': 3657, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:46.608569', 'step': 3657, 'epoch': 1} {'type': 'loss', 'content': 0.16138960421085358, 'timestamp': '2025-10-01 04:20:46.610598', 'step': 3658, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:46.664656', 'step': 3658, 'epoch': 1} {'type': 'loss', 'content': 0.14434315264225006, 'timestamp': '2025-10-01 04:20:46.673489', 'step': 3659, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:20:46.730200', 'step': 3659, 'epoch': 1} {'type': 'loss', 'content': 0.31755155324935913, 'timestamp': '2025-10-01 04:20:46.737453', 'step': 3660, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:46.793654', 'step': 3660, 'epoch': 1} {'type': 'loss', 'content': 0.17647786438465118, 'timestamp': '2025-10-01 04:20:46.795863', 'step': 3661, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:20:46.859211', 'step': 3661, 'epoch': 1} {'type': 'loss', 'content': 0.2105541080236435, 'timestamp': '2025-10-01 04:20:46.861278', 'step': 3662, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:46.914390', 'step': 3662, 'epoch': 1} {'type': 'loss', 'content': 0.2714024782180786, 'timestamp': '2025-10-01 04:20:46.916995', 'step': 3663, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:46.969784', 'step': 3663, 'epoch': 1} {'type': 'loss', 'content': 0.1643444299697876, 'timestamp': '2025-10-01 04:20:46.975493', 'step': 3664, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:20:47.031179', 'step': 3664, 'epoch': 1} {'type': 'loss', 'content': 0.14655765891075134, 'timestamp': '2025-10-01 04:20:47.036765', 'step': 3665, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:47.089496', 'step': 3665, 'epoch': 1} {'type': 'loss', 'content': 0.2096792310476303, 'timestamp': '2025-10-01 04:20:47.091851', 'step': 3666, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:47.144864', 'step': 3666, 'epoch': 1} {'type': 'loss', 'content': 0.20421554148197174, 'timestamp': '2025-10-01 04:20:47.148042', 'step': 3667, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:47.202404', 'step': 3667, 'epoch': 1} {'type': 'loss', 'content': 0.1642228662967682, 'timestamp': '2025-10-01 04:20:47.215180', 'step': 3668, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:20:47.267442', 'step': 3668, 'epoch': 1} {'type': 'loss', 'content': 0.13651490211486816, 'timestamp': '2025-10-01 04:20:47.269505', 'step': 3669, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:47.324363', 'step': 3669, 'epoch': 1} {'type': 'loss', 'content': 0.16351395845413208, 'timestamp': '2025-10-01 04:20:47.326277', 'step': 3670, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:47.379094', 'step': 3670, 'epoch': 1} {'type': 'loss', 'content': 0.16615621745586395, 'timestamp': '2025-10-01 04:20:47.381285', 'step': 3671, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:20:47.445453', 'step': 3671, 'epoch': 1} {'type': 'loss', 'content': 0.25750941038131714, 'timestamp': '2025-10-01 04:20:47.450872', 'step': 3672, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:47.505351', 'step': 3672, 'epoch': 1} {'type': 'loss', 'content': 0.12085535377264023, 'timestamp': '2025-10-01 04:20:47.507394', 'step': 3673, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:47.560175', 'step': 3673, 'epoch': 1} {'type': 'loss', 'content': 0.1652456372976303, 'timestamp': '2025-10-01 04:20:47.562049', 'step': 3674, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:47.615421', 'step': 3674, 'epoch': 1} {'type': 'loss', 'content': 0.273211270570755, 'timestamp': '2025-10-01 04:20:47.621295', 'step': 3675, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:47.679221', 'step': 3675, 'epoch': 1} {'type': 'loss', 'content': 0.08378585427999496, 'timestamp': '2025-10-01 04:20:47.685155', 'step': 3676, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:47.738096', 'step': 3676, 'epoch': 1} {'type': 'loss', 'content': 0.18754620850086212, 'timestamp': '2025-10-01 04:20:47.740271', 'step': 3677, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:47.793211', 'step': 3677, 'epoch': 1} {'type': 'loss', 'content': 0.13867169618606567, 'timestamp': '2025-10-01 04:20:47.799303', 'step': 3678, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:47.852715', 'step': 3678, 'epoch': 1} {'type': 'loss', 'content': 0.14361703395843506, 'timestamp': '2025-10-01 04:20:47.854895', 'step': 3679, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:47.908301', 'step': 3679, 'epoch': 1} {'type': 'loss', 'content': 0.21096493303775787, 'timestamp': '2025-10-01 04:20:47.913900', 'step': 3680, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-01 04:20:47.966998', 'step': 3680, 'epoch': 1} {'type': 'loss', 'content': 0.1670003980398178, 'timestamp': '2025-10-01 04:20:47.968915', 'step': 3681, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:48.021881', 'step': 3681, 'epoch': 1} {'type': 'loss', 'content': 0.11588738858699799, 'timestamp': '2025-10-01 04:20:48.025363', 'step': 3682, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:48.078494', 'step': 3682, 'epoch': 1} {'type': 'loss', 'content': 0.17057451605796814, 'timestamp': '2025-10-01 04:20:48.080699', 'step': 3683, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:48.133307', 'step': 3683, 'epoch': 1} {'type': 'loss', 'content': 0.15810948610305786, 'timestamp': '2025-10-01 04:20:48.139270', 'step': 3684, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:48.191615', 'step': 3684, 'epoch': 1} {'type': 'loss', 'content': 0.13583169877529144, 'timestamp': '2025-10-01 04:20:48.193777', 'step': 3685, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:48.247091', 'step': 3685, 'epoch': 1} {'type': 'loss', 'content': 0.30115073919296265, 'timestamp': '2025-10-01 04:20:48.248919', 'step': 3686, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:48.301615', 'step': 3686, 'epoch': 1} {'type': 'loss', 'content': 0.19608920812606812, 'timestamp': '2025-10-01 04:20:48.303667', 'step': 3687, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:48.357691', 'step': 3687, 'epoch': 1} {'type': 'loss', 'content': 0.22549040615558624, 'timestamp': '2025-10-01 04:20:48.363179', 'step': 3688, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:20:48.415397', 'step': 3688, 'epoch': 1} {'type': 'loss', 'content': 0.18703825771808624, 'timestamp': '2025-10-01 04:20:48.417748', 'step': 3689, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:48.470551', 'step': 3689, 'epoch': 1} {'type': 'loss', 'content': 0.23206602036952972, 'timestamp': '2025-10-01 04:20:48.472680', 'step': 3690, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:48.526393', 'step': 3690, 'epoch': 1} {'type': 'loss', 'content': 0.17646928131580353, 'timestamp': '2025-10-01 04:20:48.528549', 'step': 3691, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:48.581301', 'step': 3691, 'epoch': 1} {'type': 'loss', 'content': 0.14032818377017975, 'timestamp': '2025-10-01 04:20:48.586972', 'step': 3692, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:48.639571', 'step': 3692, 'epoch': 1} {'type': 'loss', 'content': 0.1577989012002945, 'timestamp': '2025-10-01 04:20:48.641591', 'step': 3693, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:48.694458', 'step': 3693, 'epoch': 1} {'type': 'loss', 'content': 0.2568471133708954, 'timestamp': '2025-10-01 04:20:48.696455', 'step': 3694, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:48.763799', 'step': 3694, 'epoch': 1} {'type': 'loss', 'content': 0.2567180097103119, 'timestamp': '2025-10-01 04:20:48.765812', 'step': 3695, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:48.818566', 'step': 3695, 'epoch': 1} {'type': 'loss', 'content': 0.1278638392686844, 'timestamp': '2025-10-01 04:20:48.824125', 'step': 3696, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:20:48.876686', 'step': 3696, 'epoch': 1} {'type': 'loss', 'content': 0.08578328043222427, 'timestamp': '2025-10-01 04:20:48.878895', 'step': 3697, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:48.931597', 'step': 3697, 'epoch': 1} {'type': 'loss', 'content': 0.19810685515403748, 'timestamp': '2025-10-01 04:20:48.933827', 'step': 3698, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:48.987110', 'step': 3698, 'epoch': 1} {'type': 'loss', 'content': 0.12579578161239624, 'timestamp': '2025-10-01 04:20:48.989096', 'step': 3699, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:20:49.041743', 'step': 3699, 'epoch': 1} {'type': 'loss', 'content': 0.0815126821398735, 'timestamp': '2025-10-01 04:20:49.047382', 'step': 3700, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:49.099646', 'step': 3700, 'epoch': 1} {'type': 'loss', 'content': 0.20545820891857147, 'timestamp': '2025-10-01 04:20:49.101601', 'step': 3701, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:49.155372', 'step': 3701, 'epoch': 1} {'type': 'loss', 'content': 0.16059903800487518, 'timestamp': '2025-10-01 04:20:49.157451', 'step': 3702, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:20:49.210358', 'step': 3702, 'epoch': 1} {'type': 'loss', 'content': 0.20031248033046722, 'timestamp': '2025-10-01 04:20:49.212563', 'step': 3703, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:49.265448', 'step': 3703, 'epoch': 1} {'type': 'loss', 'content': 0.24955454468727112, 'timestamp': '2025-10-01 04:20:49.271029', 'step': 3704, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:49.323365', 'step': 3704, 'epoch': 1} {'type': 'loss', 'content': 0.13738059997558594, 'timestamp': '2025-10-01 04:20:49.326018', 'step': 3705, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:49.378923', 'step': 3705, 'epoch': 1} {'type': 'loss', 'content': 0.14058317244052887, 'timestamp': '2025-10-01 04:20:49.380965', 'step': 3706, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:49.434338', 'step': 3706, 'epoch': 1} {'type': 'loss', 'content': 0.21854743361473083, 'timestamp': '2025-10-01 04:20:49.436408', 'step': 3707, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:49.489669', 'step': 3707, 'epoch': 1} {'type': 'loss', 'content': 0.20225171744823456, 'timestamp': '2025-10-01 04:20:49.495570', 'step': 3708, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:49.552592', 'step': 3708, 'epoch': 1} {'type': 'loss', 'content': 0.15433970093727112, 'timestamp': '2025-10-01 04:20:49.556242', 'step': 3709, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:20:49.617395', 'step': 3709, 'epoch': 1} {'type': 'loss', 'content': 0.14437155425548553, 'timestamp': '2025-10-01 04:20:49.619449', 'step': 3710, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:49.672564', 'step': 3710, 'epoch': 1} {'type': 'loss', 'content': 0.237811878323555, 'timestamp': '2025-10-01 04:20:49.674704', 'step': 3711, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:49.727481', 'step': 3711, 'epoch': 1} {'type': 'loss', 'content': 0.13984371721744537, 'timestamp': '2025-10-01 04:20:49.733170', 'step': 3712, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:49.785760', 'step': 3712, 'epoch': 1} {'type': 'loss', 'content': 0.2559190094470978, 'timestamp': '2025-10-01 04:20:49.787754', 'step': 3713, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:49.840494', 'step': 3713, 'epoch': 1} {'type': 'loss', 'content': 0.15707512199878693, 'timestamp': '2025-10-01 04:20:49.842696', 'step': 3714, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:20:49.895833', 'step': 3714, 'epoch': 1} {'type': 'loss', 'content': 0.23640130460262299, 'timestamp': '2025-10-01 04:20:49.897911', 'step': 3715, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:49.954108', 'step': 3715, 'epoch': 1} {'type': 'loss', 'content': 0.27859222888946533, 'timestamp': '2025-10-01 04:20:49.960042', 'step': 3716, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:50.012368', 'step': 3716, 'epoch': 1} {'type': 'loss', 'content': 0.13478867709636688, 'timestamp': '2025-10-01 04:20:50.014487', 'step': 3717, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:50.072720', 'step': 3717, 'epoch': 1} {'type': 'loss', 'content': 0.18988388776779175, 'timestamp': '2025-10-01 04:20:50.077262', 'step': 3718, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:50.134523', 'step': 3718, 'epoch': 1} {'type': 'loss', 'content': 0.17545154690742493, 'timestamp': '2025-10-01 04:20:50.137889', 'step': 3719, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:50.191298', 'step': 3719, 'epoch': 1} {'type': 'loss', 'content': 0.14131885766983032, 'timestamp': '2025-10-01 04:20:50.196910', 'step': 3720, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:20:50.249400', 'step': 3720, 'epoch': 1} {'type': 'loss', 'content': 0.1752220094203949, 'timestamp': '2025-10-01 04:20:50.254930', 'step': 3721, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:50.308015', 'step': 3721, 'epoch': 1} {'type': 'loss', 'content': 0.11006532609462738, 'timestamp': '2025-10-01 04:20:50.314376', 'step': 3722, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:50.368729', 'step': 3722, 'epoch': 1} {'type': 'loss', 'content': 0.20377525687217712, 'timestamp': '2025-10-01 04:20:50.370668', 'step': 3723, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:50.423323', 'step': 3723, 'epoch': 1} {'type': 'loss', 'content': 0.21274954080581665, 'timestamp': '2025-10-01 04:20:50.428949', 'step': 3724, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:50.481235', 'step': 3724, 'epoch': 1} {'type': 'loss', 'content': 0.13725648820400238, 'timestamp': '2025-10-01 04:20:50.483377', 'step': 3725, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:20:50.545283', 'step': 3725, 'epoch': 1} {'type': 'loss', 'content': 0.17805488407611847, 'timestamp': '2025-10-01 04:20:50.553065', 'step': 3726, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:20:50.609702', 'step': 3726, 'epoch': 1} {'type': 'loss', 'content': 0.16412213444709778, 'timestamp': '2025-10-01 04:20:50.611845', 'step': 3727, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:20:50.669723', 'step': 3727, 'epoch': 1} {'type': 'loss', 'content': 0.14902089536190033, 'timestamp': '2025-10-01 04:20:50.675339', 'step': 3728, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-01 04:21:03.588562', 'step': 3728, 'epoch': 1} {'type': 'pplx', 'content': 11517.87189812397, 'timestamp': '2025-10-01 04:21:03.591636', 'step': 3728, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:21:03.644732', 'step': 3728, 'epoch': 1} {'type': 'loss', 'content': 0.12442757934331894, 'timestamp': '2025-10-01 04:21:03.646904', 'step': 3729, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:03.699852', 'step': 3729, 'epoch': 1} {'type': 'loss', 'content': 0.1947082281112671, 'timestamp': '2025-10-01 04:21:03.702442', 'step': 3730, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:03.755499', 'step': 3730, 'epoch': 1} {'type': 'loss', 'content': 0.16914361715316772, 'timestamp': '2025-10-01 04:21:03.757677', 'step': 3731, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:03.810588', 'step': 3731, 'epoch': 1} {'type': 'loss', 'content': 0.23415935039520264, 'timestamp': '2025-10-01 04:21:03.816697', 'step': 3732, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:03.869677', 'step': 3732, 'epoch': 1} {'type': 'loss', 'content': 0.17882265150547028, 'timestamp': '2025-10-01 04:21:03.872117', 'step': 3733, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:03.925379', 'step': 3733, 'epoch': 1} {'type': 'loss', 'content': 0.21438784897327423, 'timestamp': '2025-10-01 04:21:03.927328', 'step': 3734, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:21:03.981186', 'step': 3734, 'epoch': 1} {'type': 'loss', 'content': 0.188007652759552, 'timestamp': '2025-10-01 04:21:03.983301', 'step': 3735, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:04.036643', 'step': 3735, 'epoch': 1} {'type': 'loss', 'content': 0.22761906683444977, 'timestamp': '2025-10-01 04:21:04.042454', 'step': 3736, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:04.095355', 'step': 3736, 'epoch': 1} {'type': 'loss', 'content': 0.15477345883846283, 'timestamp': '2025-10-01 04:21:04.097340', 'step': 3737, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:04.151225', 'step': 3737, 'epoch': 1} {'type': 'loss', 'content': 0.09106551110744476, 'timestamp': '2025-10-01 04:21:04.153498', 'step': 3738, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:21:04.206141', 'step': 3738, 'epoch': 1} {'type': 'loss', 'content': 0.16622333228588104, 'timestamp': '2025-10-01 04:21:04.208156', 'step': 3739, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:04.260022', 'step': 3739, 'epoch': 1} {'type': 'loss', 'content': 0.2001553773880005, 'timestamp': '2025-10-01 04:21:04.265985', 'step': 3740, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:04.318383', 'step': 3740, 'epoch': 1} {'type': 'loss', 'content': 0.21434316039085388, 'timestamp': '2025-10-01 04:21:04.322317', 'step': 3741, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:04.375689', 'step': 3741, 'epoch': 1} {'type': 'loss', 'content': 0.10911174863576889, 'timestamp': '2025-10-01 04:21:04.380918', 'step': 3742, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:04.434088', 'step': 3742, 'epoch': 1} {'type': 'loss', 'content': 0.166838139295578, 'timestamp': '2025-10-01 04:21:04.436302', 'step': 3743, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:04.488866', 'step': 3743, 'epoch': 1} {'type': 'loss', 'content': 0.20905351638793945, 'timestamp': '2025-10-01 04:21:04.494696', 'step': 3744, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:04.547577', 'step': 3744, 'epoch': 1} {'type': 'loss', 'content': 0.16083256900310516, 'timestamp': '2025-10-01 04:21:04.549827', 'step': 3745, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:04.602572', 'step': 3745, 'epoch': 1} {'type': 'loss', 'content': 0.1409866064786911, 'timestamp': '2025-10-01 04:21:04.604931', 'step': 3746, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:04.657189', 'step': 3746, 'epoch': 1} {'type': 'loss', 'content': 0.13365410268306732, 'timestamp': '2025-10-01 04:21:04.659299', 'step': 3747, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:04.720797', 'step': 3747, 'epoch': 1} {'type': 'loss', 'content': 0.15818549692630768, 'timestamp': '2025-10-01 04:21:04.726533', 'step': 3748, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:04.779754', 'step': 3748, 'epoch': 1} {'type': 'loss', 'content': 0.24426676332950592, 'timestamp': '2025-10-01 04:21:04.782006', 'step': 3749, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:04.834908', 'step': 3749, 'epoch': 1} {'type': 'loss', 'content': 0.17898587882518768, 'timestamp': '2025-10-01 04:21:04.837007', 'step': 3750, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:04.889061', 'step': 3750, 'epoch': 1} {'type': 'loss', 'content': 0.18509423732757568, 'timestamp': '2025-10-01 04:21:04.891296', 'step': 3751, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:04.944212', 'step': 3751, 'epoch': 1} {'type': 'loss', 'content': 0.0834900438785553, 'timestamp': '2025-10-01 04:21:04.950067', 'step': 3752, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:05.001466', 'step': 3752, 'epoch': 1} {'type': 'loss', 'content': 0.20778539776802063, 'timestamp': '2025-10-01 04:21:05.003689', 'step': 3753, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:05.055947', 'step': 3753, 'epoch': 1} {'type': 'loss', 'content': 0.1415003538131714, 'timestamp': '2025-10-01 04:21:05.057965', 'step': 3754, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:21:05.110216', 'step': 3754, 'epoch': 1} {'type': 'loss', 'content': 0.13699297606945038, 'timestamp': '2025-10-01 04:21:05.112573', 'step': 3755, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:05.165197', 'step': 3755, 'epoch': 1} {'type': 'loss', 'content': 0.20257458090782166, 'timestamp': '2025-10-01 04:21:05.171438', 'step': 3756, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:05.225006', 'step': 3756, 'epoch': 1} {'type': 'loss', 'content': 0.15219710767269135, 'timestamp': '2025-10-01 04:21:05.228213', 'step': 3757, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-01 04:21:05.283528', 'step': 3757, 'epoch': 1} {'type': 'loss', 'content': 0.24655167758464813, 'timestamp': '2025-10-01 04:21:05.285803', 'step': 3758, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:05.338309', 'step': 3758, 'epoch': 1} {'type': 'loss', 'content': 0.11127965152263641, 'timestamp': '2025-10-01 04:21:05.353013', 'step': 3759, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:05.406021', 'step': 3759, 'epoch': 1} {'type': 'loss', 'content': 0.08431056886911392, 'timestamp': '2025-10-01 04:21:05.414515', 'step': 3760, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:05.469700', 'step': 3760, 'epoch': 1} {'type': 'loss', 'content': 0.13888363540172577, 'timestamp': '2025-10-01 04:21:05.472461', 'step': 3761, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:21:05.526002', 'step': 3761, 'epoch': 1} {'type': 'loss', 'content': 0.11888281255960464, 'timestamp': '2025-10-01 04:21:05.528406', 'step': 3762, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:05.581254', 'step': 3762, 'epoch': 1} {'type': 'loss', 'content': 0.1394806206226349, 'timestamp': '2025-10-01 04:21:05.583366', 'step': 3763, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:05.636329', 'step': 3763, 'epoch': 1} {'type': 'loss', 'content': 0.1423075795173645, 'timestamp': '2025-10-01 04:21:05.641964', 'step': 3764, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:21:05.693631', 'step': 3764, 'epoch': 1} {'type': 'loss', 'content': 0.11499831080436707, 'timestamp': '2025-10-01 04:21:05.695866', 'step': 3765, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:05.748698', 'step': 3765, 'epoch': 1} {'type': 'loss', 'content': 0.07787897437810898, 'timestamp': '2025-10-01 04:21:05.751028', 'step': 3766, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:05.803615', 'step': 3766, 'epoch': 1} {'type': 'loss', 'content': 0.12577056884765625, 'timestamp': '2025-10-01 04:21:05.805897', 'step': 3767, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:05.859052', 'step': 3767, 'epoch': 1} {'type': 'loss', 'content': 0.18549400568008423, 'timestamp': '2025-10-01 04:21:05.864869', 'step': 3768, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:05.916813', 'step': 3768, 'epoch': 1} {'type': 'loss', 'content': 0.19131332635879517, 'timestamp': '2025-10-01 04:21:05.918935', 'step': 3769, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:21:05.971258', 'step': 3769, 'epoch': 1} {'type': 'loss', 'content': 0.12488359212875366, 'timestamp': '2025-10-01 04:21:05.973713', 'step': 3770, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:06.028926', 'step': 3770, 'epoch': 1} {'type': 'loss', 'content': 0.14634747803211212, 'timestamp': '2025-10-01 04:21:06.031041', 'step': 3771, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:06.083486', 'step': 3771, 'epoch': 1} {'type': 'loss', 'content': 0.12417694181203842, 'timestamp': '2025-10-01 04:21:06.089265', 'step': 3772, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:06.140890', 'step': 3772, 'epoch': 1} {'type': 'loss', 'content': 0.1834392100572586, 'timestamp': '2025-10-01 04:21:06.142994', 'step': 3773, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:06.195335', 'step': 3773, 'epoch': 1} {'type': 'loss', 'content': 0.2158253937959671, 'timestamp': '2025-10-01 04:21:06.197529', 'step': 3774, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:06.250234', 'step': 3774, 'epoch': 1} {'type': 'loss', 'content': 0.06683745980262756, 'timestamp': '2025-10-01 04:21:06.252500', 'step': 3775, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:06.305929', 'step': 3775, 'epoch': 1} {'type': 'loss', 'content': 0.1566823571920395, 'timestamp': '2025-10-01 04:21:06.311654', 'step': 3776, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:06.363888', 'step': 3776, 'epoch': 1} {'type': 'loss', 'content': 0.18685489892959595, 'timestamp': '2025-10-01 04:21:06.365984', 'step': 3777, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:06.418194', 'step': 3777, 'epoch': 1} {'type': 'loss', 'content': 0.20410291850566864, 'timestamp': '2025-10-01 04:21:06.420433', 'step': 3778, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:06.472750', 'step': 3778, 'epoch': 1} {'type': 'loss', 'content': 0.13840778172016144, 'timestamp': '2025-10-01 04:21:06.475146', 'step': 3779, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:06.527596', 'step': 3779, 'epoch': 1} {'type': 'loss', 'content': 0.1442483514547348, 'timestamp': '2025-10-01 04:21:06.533787', 'step': 3780, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:06.586881', 'step': 3780, 'epoch': 1} {'type': 'loss', 'content': 0.14513294398784637, 'timestamp': '2025-10-01 04:21:06.589088', 'step': 3781, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:06.641308', 'step': 3781, 'epoch': 1} {'type': 'loss', 'content': 0.1031176745891571, 'timestamp': '2025-10-01 04:21:06.643531', 'step': 3782, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:06.696184', 'step': 3782, 'epoch': 1} {'type': 'loss', 'content': 0.22861431539058685, 'timestamp': '2025-10-01 04:21:06.698316', 'step': 3783, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:06.750771', 'step': 3783, 'epoch': 1} {'type': 'loss', 'content': 0.12369003891944885, 'timestamp': '2025-10-01 04:21:06.756541', 'step': 3784, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:06.809073', 'step': 3784, 'epoch': 1} {'type': 'loss', 'content': 0.19885039329528809, 'timestamp': '2025-10-01 04:21:06.812116', 'step': 3785, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:06.864432', 'step': 3785, 'epoch': 1} {'type': 'loss', 'content': 0.09736756980419159, 'timestamp': '2025-10-01 04:21:06.867235', 'step': 3786, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:06.919757', 'step': 3786, 'epoch': 1} {'type': 'loss', 'content': 0.1452331393957138, 'timestamp': '2025-10-01 04:21:06.922231', 'step': 3787, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:21:06.975158', 'step': 3787, 'epoch': 1} {'type': 'loss', 'content': 0.16660970449447632, 'timestamp': '2025-10-01 04:21:06.982391', 'step': 3788, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:21:07.034241', 'step': 3788, 'epoch': 1} {'type': 'loss', 'content': 0.20153702795505524, 'timestamp': '2025-10-01 04:21:07.036502', 'step': 3789, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:21:07.088974', 'step': 3789, 'epoch': 1} {'type': 'loss', 'content': 0.16984117031097412, 'timestamp': '2025-10-01 04:21:07.091394', 'step': 3790, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:21:07.146223', 'step': 3790, 'epoch': 1} {'type': 'loss', 'content': 0.1488741785287857, 'timestamp': '2025-10-01 04:21:07.148904', 'step': 3791, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:07.205234', 'step': 3791, 'epoch': 1} {'type': 'loss', 'content': 0.1669912338256836, 'timestamp': '2025-10-01 04:21:07.212966', 'step': 3792, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:07.269194', 'step': 3792, 'epoch': 1} {'type': 'loss', 'content': 0.255784273147583, 'timestamp': '2025-10-01 04:21:07.271454', 'step': 3793, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:07.324025', 'step': 3793, 'epoch': 1} {'type': 'loss', 'content': 0.19297274947166443, 'timestamp': '2025-10-01 04:21:07.326317', 'step': 3794, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:07.382640', 'step': 3794, 'epoch': 1} {'type': 'loss', 'content': 0.2203955203294754, 'timestamp': '2025-10-01 04:21:07.384930', 'step': 3795, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:07.438608', 'step': 3795, 'epoch': 1} {'type': 'loss', 'content': 0.18160344660282135, 'timestamp': '2025-10-01 04:21:07.447555', 'step': 3796, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:07.500680', 'step': 3796, 'epoch': 1} {'type': 'loss', 'content': 0.07841341942548752, 'timestamp': '2025-10-01 04:21:07.502897', 'step': 3797, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:07.555356', 'step': 3797, 'epoch': 1} {'type': 'loss', 'content': 0.1604643613100052, 'timestamp': '2025-10-01 04:21:07.557328', 'step': 3798, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:07.616166', 'step': 3798, 'epoch': 1} {'type': 'loss', 'content': 0.29246872663497925, 'timestamp': '2025-10-01 04:21:07.618535', 'step': 3799, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:07.671223', 'step': 3799, 'epoch': 1} {'type': 'loss', 'content': 0.05514567717909813, 'timestamp': '2025-10-01 04:21:07.677210', 'step': 3800, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:07.729728', 'step': 3800, 'epoch': 1} {'type': 'loss', 'content': 0.11767282336950302, 'timestamp': '2025-10-01 04:21:07.731736', 'step': 3801, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:07.787603', 'step': 3801, 'epoch': 1} {'type': 'loss', 'content': 0.15494488179683685, 'timestamp': '2025-10-01 04:21:07.789909', 'step': 3802, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:07.844691', 'step': 3802, 'epoch': 1} {'type': 'loss', 'content': 0.12275340408086777, 'timestamp': '2025-10-01 04:21:07.848476', 'step': 3803, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:07.903711', 'step': 3803, 'epoch': 1} {'type': 'loss', 'content': 0.18913201987743378, 'timestamp': '2025-10-01 04:21:07.909388', 'step': 3804, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:07.961472', 'step': 3804, 'epoch': 1} {'type': 'loss', 'content': 0.15701350569725037, 'timestamp': '2025-10-01 04:21:07.963723', 'step': 3805, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:08.024206', 'step': 3805, 'epoch': 1} {'type': 'loss', 'content': 0.11144548654556274, 'timestamp': '2025-10-01 04:21:08.026522', 'step': 3806, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:08.079550', 'step': 3806, 'epoch': 1} {'type': 'loss', 'content': 0.24468018114566803, 'timestamp': '2025-10-01 04:21:08.081831', 'step': 3807, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:08.134082', 'step': 3807, 'epoch': 1} {'type': 'loss', 'content': 0.12380355596542358, 'timestamp': '2025-10-01 04:21:08.140072', 'step': 3808, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:08.192872', 'step': 3808, 'epoch': 1} {'type': 'loss', 'content': 0.15536203980445862, 'timestamp': '2025-10-01 04:21:08.195014', 'step': 3809, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:08.247183', 'step': 3809, 'epoch': 1} {'type': 'loss', 'content': 0.1974014788866043, 'timestamp': '2025-10-01 04:21:08.249416', 'step': 3810, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:08.302686', 'step': 3810, 'epoch': 1} {'type': 'loss', 'content': 0.1628580540418625, 'timestamp': '2025-10-01 04:21:08.304801', 'step': 3811, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:08.360571', 'step': 3811, 'epoch': 1} {'type': 'loss', 'content': 0.18991650640964508, 'timestamp': '2025-10-01 04:21:08.366327', 'step': 3812, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:08.418318', 'step': 3812, 'epoch': 1} {'type': 'loss', 'content': 0.16269221901893616, 'timestamp': '2025-10-01 04:21:08.420664', 'step': 3813, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:08.473377', 'step': 3813, 'epoch': 1} {'type': 'loss', 'content': 0.14082403481006622, 'timestamp': '2025-10-01 04:21:08.475772', 'step': 3814, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:08.528652', 'step': 3814, 'epoch': 1} {'type': 'loss', 'content': 0.1235368400812149, 'timestamp': '2025-10-01 04:21:08.531973', 'step': 3815, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:21:08.584551', 'step': 3815, 'epoch': 1} {'type': 'loss', 'content': 0.20281295478343964, 'timestamp': '2025-10-01 04:21:08.590163', 'step': 3816, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:08.642188', 'step': 3816, 'epoch': 1} {'type': 'loss', 'content': 0.1873542070388794, 'timestamp': '2025-10-01 04:21:08.644245', 'step': 3817, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:21:08.696259', 'step': 3817, 'epoch': 1} {'type': 'loss', 'content': 0.10918585956096649, 'timestamp': '2025-10-01 04:21:08.698664', 'step': 3818, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:08.751601', 'step': 3818, 'epoch': 1} {'type': 'loss', 'content': 0.17572373151779175, 'timestamp': '2025-10-01 04:21:08.753649', 'step': 3819, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:08.806444', 'step': 3819, 'epoch': 1} {'type': 'loss', 'content': 0.14352348446846008, 'timestamp': '2025-10-01 04:21:08.812342', 'step': 3820, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:08.864684', 'step': 3820, 'epoch': 1} {'type': 'loss', 'content': 0.12173763662576675, 'timestamp': '2025-10-01 04:21:08.866889', 'step': 3821, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:08.919915', 'step': 3821, 'epoch': 1} {'type': 'loss', 'content': 0.1845068335533142, 'timestamp': '2025-10-01 04:21:08.921941', 'step': 3822, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:08.974241', 'step': 3822, 'epoch': 1} {'type': 'loss', 'content': 0.10331045091152191, 'timestamp': '2025-10-01 04:21:08.976635', 'step': 3823, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:09.028796', 'step': 3823, 'epoch': 1} {'type': 'loss', 'content': 0.21172404289245605, 'timestamp': '2025-10-01 04:21:09.034734', 'step': 3824, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:09.088193', 'step': 3824, 'epoch': 1} {'type': 'loss', 'content': 0.12210867553949356, 'timestamp': '2025-10-01 04:21:09.090441', 'step': 3825, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:21:09.143761', 'step': 3825, 'epoch': 1} {'type': 'loss', 'content': 0.20330080389976501, 'timestamp': '2025-10-01 04:21:09.151241', 'step': 3826, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:09.205342', 'step': 3826, 'epoch': 1} {'type': 'loss', 'content': 0.1507522165775299, 'timestamp': '2025-10-01 04:21:09.207706', 'step': 3827, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:09.264525', 'step': 3827, 'epoch': 1} {'type': 'loss', 'content': 0.2024773508310318, 'timestamp': '2025-10-01 04:21:09.270475', 'step': 3828, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:09.322885', 'step': 3828, 'epoch': 1} {'type': 'loss', 'content': 0.1637982875108719, 'timestamp': '2025-10-01 04:21:09.325307', 'step': 3829, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:09.377908', 'step': 3829, 'epoch': 1} {'type': 'loss', 'content': 0.12680251896381378, 'timestamp': '2025-10-01 04:21:09.380057', 'step': 3830, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:09.438311', 'step': 3830, 'epoch': 1} {'type': 'loss', 'content': 0.15248562395572662, 'timestamp': '2025-10-01 04:21:09.440481', 'step': 3831, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:09.492706', 'step': 3831, 'epoch': 1} {'type': 'loss', 'content': 0.211716890335083, 'timestamp': '2025-10-01 04:21:09.498386', 'step': 3832, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:09.551380', 'step': 3832, 'epoch': 1} {'type': 'loss', 'content': 0.24689044058322906, 'timestamp': '2025-10-01 04:21:09.553372', 'step': 3833, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:09.605935', 'step': 3833, 'epoch': 1} {'type': 'loss', 'content': 0.1999245285987854, 'timestamp': '2025-10-01 04:21:09.608409', 'step': 3834, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:09.662160', 'step': 3834, 'epoch': 1} {'type': 'loss', 'content': 0.243472158908844, 'timestamp': '2025-10-01 04:21:09.664311', 'step': 3835, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:09.717598', 'step': 3835, 'epoch': 1} {'type': 'loss', 'content': 0.15598778426647186, 'timestamp': '2025-10-01 04:21:09.723315', 'step': 3836, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:09.775784', 'step': 3836, 'epoch': 1} {'type': 'loss', 'content': 0.198815256357193, 'timestamp': '2025-10-01 04:21:09.778091', 'step': 3837, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:09.832038', 'step': 3837, 'epoch': 1} {'type': 'loss', 'content': 0.1338084191083908, 'timestamp': '2025-10-01 04:21:09.834264', 'step': 3838, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:09.886990', 'step': 3838, 'epoch': 1} {'type': 'loss', 'content': 0.17396071553230286, 'timestamp': '2025-10-01 04:21:09.889003', 'step': 3839, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:09.941553', 'step': 3839, 'epoch': 1} {'type': 'loss', 'content': 0.16460467875003815, 'timestamp': '2025-10-01 04:21:09.947421', 'step': 3840, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:21:09.999545', 'step': 3840, 'epoch': 1} {'type': 'loss', 'content': 0.17588284611701965, 'timestamp': '2025-10-01 04:21:10.001723', 'step': 3841, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:10.053951', 'step': 3841, 'epoch': 1} {'type': 'loss', 'content': 0.23745019733905792, 'timestamp': '2025-10-01 04:21:10.056667', 'step': 3842, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:10.109928', 'step': 3842, 'epoch': 1} {'type': 'loss', 'content': 0.19348375499248505, 'timestamp': '2025-10-01 04:21:10.112137', 'step': 3843, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:10.164295', 'step': 3843, 'epoch': 1} {'type': 'loss', 'content': 0.20988474786281586, 'timestamp': '2025-10-01 04:21:10.178641', 'step': 3844, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:10.231400', 'step': 3844, 'epoch': 1} {'type': 'loss', 'content': 0.2705570161342621, 'timestamp': '2025-10-01 04:21:10.234360', 'step': 3845, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:10.289598', 'step': 3845, 'epoch': 1} {'type': 'loss', 'content': 0.1868039071559906, 'timestamp': '2025-10-01 04:21:10.292181', 'step': 3846, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:10.345107', 'step': 3846, 'epoch': 1} {'type': 'loss', 'content': 0.28184032440185547, 'timestamp': '2025-10-01 04:21:10.347657', 'step': 3847, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:10.402909', 'step': 3847, 'epoch': 1} {'type': 'loss', 'content': 0.2254287451505661, 'timestamp': '2025-10-01 04:21:10.408544', 'step': 3848, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:10.460504', 'step': 3848, 'epoch': 1} {'type': 'loss', 'content': 0.2159462571144104, 'timestamp': '2025-10-01 04:21:10.462599', 'step': 3849, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:10.514953', 'step': 3849, 'epoch': 1} {'type': 'loss', 'content': 0.16223372519016266, 'timestamp': '2025-10-01 04:21:10.517505', 'step': 3850, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:10.570941', 'step': 3850, 'epoch': 1} {'type': 'loss', 'content': 0.19486796855926514, 'timestamp': '2025-10-01 04:21:10.573159', 'step': 3851, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:10.625986', 'step': 3851, 'epoch': 1} {'type': 'loss', 'content': 0.21039928495883942, 'timestamp': '2025-10-01 04:21:10.631891', 'step': 3852, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:10.686284', 'step': 3852, 'epoch': 1} {'type': 'loss', 'content': 0.1944684088230133, 'timestamp': '2025-10-01 04:21:10.688444', 'step': 3853, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:21:10.740930', 'step': 3853, 'epoch': 1} {'type': 'loss', 'content': 0.15996667742729187, 'timestamp': '2025-10-01 04:21:10.743624', 'step': 3854, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:10.795586', 'step': 3854, 'epoch': 1} {'type': 'loss', 'content': 0.1757199615240097, 'timestamp': '2025-10-01 04:21:10.803160', 'step': 3855, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:10.855477', 'step': 3855, 'epoch': 1} {'type': 'loss', 'content': 0.17536422610282898, 'timestamp': '2025-10-01 04:21:10.861134', 'step': 3856, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:10.920107', 'step': 3856, 'epoch': 1} {'type': 'loss', 'content': 0.16844190657138824, 'timestamp': '2025-10-01 04:21:10.922344', 'step': 3857, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:10.974863', 'step': 3857, 'epoch': 1} {'type': 'loss', 'content': 0.18197840452194214, 'timestamp': '2025-10-01 04:21:10.977005', 'step': 3858, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:11.029503', 'step': 3858, 'epoch': 1} {'type': 'loss', 'content': 0.1243605986237526, 'timestamp': '2025-10-01 04:21:11.031543', 'step': 3859, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:21:11.084933', 'step': 3859, 'epoch': 1} {'type': 'loss', 'content': 0.09863444417715073, 'timestamp': '2025-10-01 04:21:11.095993', 'step': 3860, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:11.150138', 'step': 3860, 'epoch': 1} {'type': 'loss', 'content': 0.2670561373233795, 'timestamp': '2025-10-01 04:21:11.152380', 'step': 3861, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:11.204939', 'step': 3861, 'epoch': 1} {'type': 'loss', 'content': 0.15803886950016022, 'timestamp': '2025-10-01 04:21:11.207145', 'step': 3862, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:21:11.259768', 'step': 3862, 'epoch': 1} {'type': 'loss', 'content': 0.16729626059532166, 'timestamp': '2025-10-01 04:21:11.261895', 'step': 3863, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:21:11.315224', 'step': 3863, 'epoch': 1} {'type': 'loss', 'content': 0.10031541436910629, 'timestamp': '2025-10-01 04:21:11.320896', 'step': 3864, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:21:11.372962', 'step': 3864, 'epoch': 1} {'type': 'loss', 'content': 0.19248022139072418, 'timestamp': '2025-10-01 04:21:11.375219', 'step': 3865, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:11.427291', 'step': 3865, 'epoch': 1} {'type': 'loss', 'content': 0.2532694935798645, 'timestamp': '2025-10-01 04:21:11.430277', 'step': 3866, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:11.483308', 'step': 3866, 'epoch': 1} {'type': 'loss', 'content': 0.17540788650512695, 'timestamp': '2025-10-01 04:21:11.487260', 'step': 3867, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:21:11.544160', 'step': 3867, 'epoch': 1} {'type': 'loss', 'content': 0.11058373749256134, 'timestamp': '2025-10-01 04:21:11.549943', 'step': 3868, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:11.604407', 'step': 3868, 'epoch': 1} {'type': 'loss', 'content': 0.17569464445114136, 'timestamp': '2025-10-01 04:21:11.607775', 'step': 3869, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:11.660210', 'step': 3869, 'epoch': 1} {'type': 'loss', 'content': 0.18614934384822845, 'timestamp': '2025-10-01 04:21:11.662255', 'step': 3870, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:11.715367', 'step': 3870, 'epoch': 1} {'type': 'loss', 'content': 0.11953143775463104, 'timestamp': '2025-10-01 04:21:11.718779', 'step': 3871, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:21:11.774326', 'step': 3871, 'epoch': 1} {'type': 'loss', 'content': 0.2699061632156372, 'timestamp': '2025-10-01 04:21:11.784625', 'step': 3872, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:11.840196', 'step': 3872, 'epoch': 1} {'type': 'loss', 'content': 0.2433842420578003, 'timestamp': '2025-10-01 04:21:11.843692', 'step': 3873, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:11.901616', 'step': 3873, 'epoch': 1} {'type': 'loss', 'content': 0.13827525079250336, 'timestamp': '2025-10-01 04:21:11.903763', 'step': 3874, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:11.956374', 'step': 3874, 'epoch': 1} {'type': 'loss', 'content': 0.10295934975147247, 'timestamp': '2025-10-01 04:21:11.958440', 'step': 3875, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:12.012556', 'step': 3875, 'epoch': 1} {'type': 'loss', 'content': 0.13781464099884033, 'timestamp': '2025-10-01 04:21:12.018320', 'step': 3876, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:12.071342', 'step': 3876, 'epoch': 1} {'type': 'loss', 'content': 0.12861385941505432, 'timestamp': '2025-10-01 04:21:12.085747', 'step': 3877, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:12.147994', 'step': 3877, 'epoch': 1} {'type': 'loss', 'content': 0.10342645645141602, 'timestamp': '2025-10-01 04:21:12.150505', 'step': 3878, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:12.204021', 'step': 3878, 'epoch': 1} {'type': 'loss', 'content': 0.20941229164600372, 'timestamp': '2025-10-01 04:21:12.210691', 'step': 3879, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:12.271312', 'step': 3879, 'epoch': 1} {'type': 'loss', 'content': 0.1138954907655716, 'timestamp': '2025-10-01 04:21:12.276987', 'step': 3880, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:12.335953', 'step': 3880, 'epoch': 1} {'type': 'loss', 'content': 0.1985904723405838, 'timestamp': '2025-10-01 04:21:12.347901', 'step': 3881, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:12.403716', 'step': 3881, 'epoch': 1} {'type': 'loss', 'content': 0.18127088248729706, 'timestamp': '2025-10-01 04:21:12.410138', 'step': 3882, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:12.474598', 'step': 3882, 'epoch': 1} {'type': 'loss', 'content': 0.12930800020694733, 'timestamp': '2025-10-01 04:21:12.476824', 'step': 3883, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:12.528888', 'step': 3883, 'epoch': 1} {'type': 'loss', 'content': 0.18602988123893738, 'timestamp': '2025-10-01 04:21:12.534548', 'step': 3884, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:12.597705', 'step': 3884, 'epoch': 1} {'type': 'loss', 'content': 0.22403597831726074, 'timestamp': '2025-10-01 04:21:12.599575', 'step': 3885, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:12.652311', 'step': 3885, 'epoch': 1} {'type': 'loss', 'content': 0.14963412284851074, 'timestamp': '2025-10-01 04:21:12.654694', 'step': 3886, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:12.713460', 'step': 3886, 'epoch': 1} {'type': 'loss', 'content': 0.1408631056547165, 'timestamp': '2025-10-01 04:21:12.715677', 'step': 3887, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:12.767930', 'step': 3887, 'epoch': 1} {'type': 'loss', 'content': 0.15138483047485352, 'timestamp': '2025-10-01 04:21:12.773464', 'step': 3888, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:12.825635', 'step': 3888, 'epoch': 1} {'type': 'loss', 'content': 0.19210278987884521, 'timestamp': '2025-10-01 04:21:12.827896', 'step': 3889, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:12.880786', 'step': 3889, 'epoch': 1} {'type': 'loss', 'content': 0.2013344168663025, 'timestamp': '2025-10-01 04:21:12.883054', 'step': 3890, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:12.936774', 'step': 3890, 'epoch': 1} {'type': 'loss', 'content': 0.16693401336669922, 'timestamp': '2025-10-01 04:21:12.939357', 'step': 3891, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:21:12.992166', 'step': 3891, 'epoch': 1} {'type': 'loss', 'content': 0.19722308218479156, 'timestamp': '2025-10-01 04:21:12.997840', 'step': 3892, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:13.049658', 'step': 3892, 'epoch': 1} {'type': 'loss', 'content': 0.1521126925945282, 'timestamp': '2025-10-01 04:21:13.051988', 'step': 3893, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:21:13.104380', 'step': 3893, 'epoch': 1} {'type': 'loss', 'content': 0.1707478165626526, 'timestamp': '2025-10-01 04:21:13.108413', 'step': 3894, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:21:13.161012', 'step': 3894, 'epoch': 1} {'type': 'loss', 'content': 0.15408112108707428, 'timestamp': '2025-10-01 04:21:13.163178', 'step': 3895, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:13.216413', 'step': 3895, 'epoch': 1} {'type': 'loss', 'content': 0.14491069316864014, 'timestamp': '2025-10-01 04:21:13.222138', 'step': 3896, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:13.275011', 'step': 3896, 'epoch': 1} {'type': 'loss', 'content': 0.13474519550800323, 'timestamp': '2025-10-01 04:21:13.277022', 'step': 3897, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:13.335029', 'step': 3897, 'epoch': 1} {'type': 'loss', 'content': 0.18123477697372437, 'timestamp': '2025-10-01 04:21:13.337402', 'step': 3898, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:13.390414', 'step': 3898, 'epoch': 1} {'type': 'loss', 'content': 0.24359729886054993, 'timestamp': '2025-10-01 04:21:13.392725', 'step': 3899, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:21:13.445341', 'step': 3899, 'epoch': 1} {'type': 'loss', 'content': 0.18081898987293243, 'timestamp': '2025-10-01 04:21:13.452395', 'step': 3900, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:13.504577', 'step': 3900, 'epoch': 1} {'type': 'loss', 'content': 0.19322890043258667, 'timestamp': '2025-10-01 04:21:13.507048', 'step': 3901, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:13.559824', 'step': 3901, 'epoch': 1} {'type': 'loss', 'content': 0.11070789396762848, 'timestamp': '2025-10-01 04:21:13.562063', 'step': 3902, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:13.614174', 'step': 3902, 'epoch': 1} {'type': 'loss', 'content': 0.15200833976268768, 'timestamp': '2025-10-01 04:21:13.616406', 'step': 3903, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:13.668842', 'step': 3903, 'epoch': 1} {'type': 'loss', 'content': 0.14971432089805603, 'timestamp': '2025-10-01 04:21:13.674606', 'step': 3904, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:13.726865', 'step': 3904, 'epoch': 1} {'type': 'loss', 'content': 0.17380210757255554, 'timestamp': '2025-10-01 04:21:13.730156', 'step': 3905, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:13.783372', 'step': 3905, 'epoch': 1} {'type': 'loss', 'content': 0.13798682391643524, 'timestamp': '2025-10-01 04:21:13.785659', 'step': 3906, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:21:13.838700', 'step': 3906, 'epoch': 1} {'type': 'loss', 'content': 0.1793123036623001, 'timestamp': '2025-10-01 04:21:13.840999', 'step': 3907, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:21:13.893423', 'step': 3907, 'epoch': 1} {'type': 'loss', 'content': 0.12748552858829498, 'timestamp': '2025-10-01 04:21:13.899525', 'step': 3908, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:13.951607', 'step': 3908, 'epoch': 1} {'type': 'loss', 'content': 0.19030334055423737, 'timestamp': '2025-10-01 04:21:13.953941', 'step': 3909, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:14.007112', 'step': 3909, 'epoch': 1} {'type': 'loss', 'content': 0.18283019959926605, 'timestamp': '2025-10-01 04:21:14.009264', 'step': 3910, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:14.062377', 'step': 3910, 'epoch': 1} {'type': 'loss', 'content': 0.16493898630142212, 'timestamp': '2025-10-01 04:21:14.064704', 'step': 3911, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:21:14.117941', 'step': 3911, 'epoch': 1} {'type': 'loss', 'content': 0.12767653167247772, 'timestamp': '2025-10-01 04:21:14.123761', 'step': 3912, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:14.181017', 'step': 3912, 'epoch': 1} {'type': 'loss', 'content': 0.1468687504529953, 'timestamp': '2025-10-01 04:21:14.183449', 'step': 3913, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:14.236410', 'step': 3913, 'epoch': 1} {'type': 'loss', 'content': 0.1381637156009674, 'timestamp': '2025-10-01 04:21:14.239140', 'step': 3914, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:14.291684', 'step': 3914, 'epoch': 1} {'type': 'loss', 'content': 0.18905943632125854, 'timestamp': '2025-10-01 04:21:14.293999', 'step': 3915, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:14.346904', 'step': 3915, 'epoch': 1} {'type': 'loss', 'content': 0.10481041669845581, 'timestamp': '2025-10-01 04:21:14.352868', 'step': 3916, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:21:14.405277', 'step': 3916, 'epoch': 1} {'type': 'loss', 'content': 0.2189904898405075, 'timestamp': '2025-10-01 04:21:14.407468', 'step': 3917, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:21:14.460462', 'step': 3917, 'epoch': 1} {'type': 'loss', 'content': 0.271513432264328, 'timestamp': '2025-10-01 04:21:14.462563', 'step': 3918, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:14.516229', 'step': 3918, 'epoch': 1} {'type': 'loss', 'content': 0.24655678868293762, 'timestamp': '2025-10-01 04:21:14.518462', 'step': 3919, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:14.571808', 'step': 3919, 'epoch': 1} {'type': 'loss', 'content': 0.21916484832763672, 'timestamp': '2025-10-01 04:21:14.577950', 'step': 3920, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:21:14.631053', 'step': 3920, 'epoch': 1} {'type': 'loss', 'content': 0.1355070173740387, 'timestamp': '2025-10-01 04:21:14.633417', 'step': 3921, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:14.696773', 'step': 3921, 'epoch': 1} {'type': 'loss', 'content': 0.18971359729766846, 'timestamp': '2025-10-01 04:21:14.699171', 'step': 3922, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:21:14.753489', 'step': 3922, 'epoch': 1} {'type': 'loss', 'content': 0.14770019054412842, 'timestamp': '2025-10-01 04:21:14.756090', 'step': 3923, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:14.809432', 'step': 3923, 'epoch': 1} {'type': 'loss', 'content': 0.14467105269432068, 'timestamp': '2025-10-01 04:21:14.816116', 'step': 3924, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:14.869469', 'step': 3924, 'epoch': 1} {'type': 'loss', 'content': 0.11673034727573395, 'timestamp': '2025-10-01 04:21:14.872125', 'step': 3925, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:14.925883', 'step': 3925, 'epoch': 1} {'type': 'loss', 'content': 0.21172285079956055, 'timestamp': '2025-10-01 04:21:14.927887', 'step': 3926, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:14.987339', 'step': 3926, 'epoch': 1} {'type': 'loss', 'content': 0.15184767544269562, 'timestamp': '2025-10-01 04:21:14.989940', 'step': 3927, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:15.044258', 'step': 3927, 'epoch': 1} {'type': 'loss', 'content': 0.1815219521522522, 'timestamp': '2025-10-01 04:21:15.050551', 'step': 3928, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:15.103122', 'step': 3928, 'epoch': 1} {'type': 'loss', 'content': 0.12659375369548798, 'timestamp': '2025-10-01 04:21:15.105669', 'step': 3929, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:15.159620', 'step': 3929, 'epoch': 1} {'type': 'loss', 'content': 0.1388045698404312, 'timestamp': '2025-10-01 04:21:15.162586', 'step': 3930, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:15.216424', 'step': 3930, 'epoch': 1} {'type': 'loss', 'content': 0.2524076998233795, 'timestamp': '2025-10-01 04:21:15.218924', 'step': 3931, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:15.273094', 'step': 3931, 'epoch': 1} {'type': 'loss', 'content': 0.1598900854587555, 'timestamp': '2025-10-01 04:21:15.279573', 'step': 3932, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:15.332093', 'step': 3932, 'epoch': 1} {'type': 'loss', 'content': 0.1696644127368927, 'timestamp': '2025-10-01 04:21:15.334336', 'step': 3933, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:15.388118', 'step': 3933, 'epoch': 1} {'type': 'loss', 'content': 0.14268790185451508, 'timestamp': '2025-10-01 04:21:15.390936', 'step': 3934, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:15.444644', 'step': 3934, 'epoch': 1} {'type': 'loss', 'content': 0.20571020245552063, 'timestamp': '2025-10-01 04:21:15.446909', 'step': 3935, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:15.500879', 'step': 3935, 'epoch': 1} {'type': 'loss', 'content': 0.10388270765542984, 'timestamp': '2025-10-01 04:21:15.506964', 'step': 3936, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:15.559958', 'step': 3936, 'epoch': 1} {'type': 'loss', 'content': 0.1638478934764862, 'timestamp': '2025-10-01 04:21:15.562433', 'step': 3937, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:15.615541', 'step': 3937, 'epoch': 1} {'type': 'loss', 'content': 0.15166279673576355, 'timestamp': '2025-10-01 04:21:15.618126', 'step': 3938, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:15.671483', 'step': 3938, 'epoch': 1} {'type': 'loss', 'content': 0.18509015440940857, 'timestamp': '2025-10-01 04:21:15.673974', 'step': 3939, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:21:15.728070', 'step': 3939, 'epoch': 1} {'type': 'loss', 'content': 0.198492169380188, 'timestamp': '2025-10-01 04:21:15.734287', 'step': 3940, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:15.786554', 'step': 3940, 'epoch': 1} {'type': 'loss', 'content': 0.20940685272216797, 'timestamp': '2025-10-01 04:21:15.788797', 'step': 3941, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:15.842524', 'step': 3941, 'epoch': 1} {'type': 'loss', 'content': 0.21516983211040497, 'timestamp': '2025-10-01 04:21:15.845195', 'step': 3942, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:15.898722', 'step': 3942, 'epoch': 1} {'type': 'loss', 'content': 0.20614802837371826, 'timestamp': '2025-10-01 04:21:15.902011', 'step': 3943, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:15.958451', 'step': 3943, 'epoch': 1} {'type': 'loss', 'content': 0.1938399225473404, 'timestamp': '2025-10-01 04:21:15.964886', 'step': 3944, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:16.019477', 'step': 3944, 'epoch': 1} {'type': 'loss', 'content': 0.20277127623558044, 'timestamp': '2025-10-01 04:21:16.022266', 'step': 3945, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:16.075646', 'step': 3945, 'epoch': 1} {'type': 'loss', 'content': 0.2329740673303604, 'timestamp': '2025-10-01 04:21:16.078379', 'step': 3946, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:21:16.133210', 'step': 3946, 'epoch': 1} {'type': 'loss', 'content': 0.17284515500068665, 'timestamp': '2025-10-01 04:21:16.136611', 'step': 3947, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:16.190688', 'step': 3947, 'epoch': 1} {'type': 'loss', 'content': 0.21103112399578094, 'timestamp': '2025-10-01 04:21:16.197898', 'step': 3948, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:21:16.250662', 'step': 3948, 'epoch': 1} {'type': 'loss', 'content': 0.15049605071544647, 'timestamp': '2025-10-01 04:21:16.252909', 'step': 3949, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:16.305309', 'step': 3949, 'epoch': 1} {'type': 'loss', 'content': 0.1682310700416565, 'timestamp': '2025-10-01 04:21:16.307480', 'step': 3950, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:16.366213', 'step': 3950, 'epoch': 1} {'type': 'loss', 'content': 0.2996060848236084, 'timestamp': '2025-10-01 04:21:16.368549', 'step': 3951, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:16.422016', 'step': 3951, 'epoch': 1} {'type': 'loss', 'content': 0.16546203196048737, 'timestamp': '2025-10-01 04:21:16.428137', 'step': 3952, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:16.481582', 'step': 3952, 'epoch': 1} {'type': 'loss', 'content': 0.23063413798809052, 'timestamp': '2025-10-01 04:21:16.485997', 'step': 3953, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:21:16.539322', 'step': 3953, 'epoch': 1} {'type': 'loss', 'content': 0.15447641909122467, 'timestamp': '2025-10-01 04:21:16.543375', 'step': 3954, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:16.597251', 'step': 3954, 'epoch': 1} {'type': 'loss', 'content': 0.12426035851240158, 'timestamp': '2025-10-01 04:21:16.600573', 'step': 3955, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:16.660613', 'step': 3955, 'epoch': 1} {'type': 'loss', 'content': 0.0959630161523819, 'timestamp': '2025-10-01 04:21:16.666907', 'step': 3956, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:16.719526', 'step': 3956, 'epoch': 1} {'type': 'loss', 'content': 0.15273183584213257, 'timestamp': '2025-10-01 04:21:16.724174', 'step': 3957, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:16.778456', 'step': 3957, 'epoch': 1} {'type': 'loss', 'content': 0.20095038414001465, 'timestamp': '2025-10-01 04:21:16.784441', 'step': 3958, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:16.838021', 'step': 3958, 'epoch': 1} {'type': 'loss', 'content': 0.2066577672958374, 'timestamp': '2025-10-01 04:21:16.840416', 'step': 3959, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:16.893022', 'step': 3959, 'epoch': 1} {'type': 'loss', 'content': 0.11366655677556992, 'timestamp': '2025-10-01 04:21:16.898956', 'step': 3960, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:16.951059', 'step': 3960, 'epoch': 1} {'type': 'loss', 'content': 0.22819960117340088, 'timestamp': '2025-10-01 04:21:16.960880', 'step': 3961, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:17.013597', 'step': 3961, 'epoch': 1} {'type': 'loss', 'content': 0.16604721546173096, 'timestamp': '2025-10-01 04:21:17.016072', 'step': 3962, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:21:17.070232', 'step': 3962, 'epoch': 1} {'type': 'loss', 'content': 0.17779937386512756, 'timestamp': '2025-10-01 04:21:17.073422', 'step': 3963, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:17.135495', 'step': 3963, 'epoch': 1} {'type': 'loss', 'content': 0.14770297706127167, 'timestamp': '2025-10-01 04:21:17.141593', 'step': 3964, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:17.193984', 'step': 3964, 'epoch': 1} {'type': 'loss', 'content': 0.18317684531211853, 'timestamp': '2025-10-01 04:21:17.195993', 'step': 3965, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:17.248670', 'step': 3965, 'epoch': 1} {'type': 'loss', 'content': 0.22521187365055084, 'timestamp': '2025-10-01 04:21:17.250888', 'step': 3966, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:17.305382', 'step': 3966, 'epoch': 1} {'type': 'loss', 'content': 0.10181644558906555, 'timestamp': '2025-10-01 04:21:17.309596', 'step': 3967, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:17.361665', 'step': 3967, 'epoch': 1} {'type': 'loss', 'content': 0.17134033143520355, 'timestamp': '2025-10-01 04:21:17.367522', 'step': 3968, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:17.419823', 'step': 3968, 'epoch': 1} {'type': 'loss', 'content': 0.2812274098396301, 'timestamp': '2025-10-01 04:21:17.421969', 'step': 3969, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:17.474564', 'step': 3969, 'epoch': 1} {'type': 'loss', 'content': 0.18612970411777496, 'timestamp': '2025-10-01 04:21:17.476790', 'step': 3970, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:21:17.530522', 'step': 3970, 'epoch': 1} {'type': 'loss', 'content': 0.13279151916503906, 'timestamp': '2025-10-01 04:21:17.532665', 'step': 3971, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:21:17.585698', 'step': 3971, 'epoch': 1} {'type': 'loss', 'content': 0.18699294328689575, 'timestamp': '2025-10-01 04:21:17.591650', 'step': 3972, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:21:17.644152', 'step': 3972, 'epoch': 1} {'type': 'loss', 'content': 0.22463545203208923, 'timestamp': '2025-10-01 04:21:17.646799', 'step': 3973, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:17.699058', 'step': 3973, 'epoch': 1} {'type': 'loss', 'content': 0.18326745927333832, 'timestamp': '2025-10-01 04:21:17.701507', 'step': 3974, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:21:17.754296', 'step': 3974, 'epoch': 1} {'type': 'loss', 'content': 0.22809825837612152, 'timestamp': '2025-10-01 04:21:17.756548', 'step': 3975, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:21:17.809896', 'step': 3975, 'epoch': 1} {'type': 'loss', 'content': 0.16294646263122559, 'timestamp': '2025-10-01 04:21:17.815688', 'step': 3976, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:17.868179', 'step': 3976, 'epoch': 1} {'type': 'loss', 'content': 0.16390514373779297, 'timestamp': '2025-10-01 04:21:17.870708', 'step': 3977, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:17.923822', 'step': 3977, 'epoch': 1} {'type': 'loss', 'content': 0.164727583527565, 'timestamp': '2025-10-01 04:21:17.925856', 'step': 3978, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:17.978834', 'step': 3978, 'epoch': 1} {'type': 'loss', 'content': 0.1617886871099472, 'timestamp': '2025-10-01 04:21:17.981672', 'step': 3979, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:18.033893', 'step': 3979, 'epoch': 1} {'type': 'loss', 'content': 0.19686873257160187, 'timestamp': '2025-10-01 04:21:18.039473', 'step': 3980, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:18.092324', 'step': 3980, 'epoch': 1} {'type': 'loss', 'content': 0.10039511322975159, 'timestamp': '2025-10-01 04:21:18.094968', 'step': 3981, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-01 04:21:18.147994', 'step': 3981, 'epoch': 1} {'type': 'loss', 'content': 0.1548936814069748, 'timestamp': '2025-10-01 04:21:18.150644', 'step': 3982, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:18.203768', 'step': 3982, 'epoch': 1} {'type': 'loss', 'content': 0.15214088559150696, 'timestamp': '2025-10-01 04:21:18.220117', 'step': 3983, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:18.274018', 'step': 3983, 'epoch': 1} {'type': 'loss', 'content': 0.09661350399255753, 'timestamp': '2025-10-01 04:21:18.280194', 'step': 3984, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:18.334716', 'step': 3984, 'epoch': 1} {'type': 'loss', 'content': 0.15028665959835052, 'timestamp': '2025-10-01 04:21:18.337253', 'step': 3985, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:21:18.391011', 'step': 3985, 'epoch': 1} {'type': 'loss', 'content': 0.15428024530410767, 'timestamp': '2025-10-01 04:21:18.392986', 'step': 3986, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:18.445645', 'step': 3986, 'epoch': 1} {'type': 'loss', 'content': 0.1338777393102646, 'timestamp': '2025-10-01 04:21:18.448059', 'step': 3987, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:18.501947', 'step': 3987, 'epoch': 1} {'type': 'loss', 'content': 0.09496114403009415, 'timestamp': '2025-10-01 04:21:18.516497', 'step': 3988, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:18.569639', 'step': 3988, 'epoch': 1} {'type': 'loss', 'content': 0.16609624028205872, 'timestamp': '2025-10-01 04:21:18.571819', 'step': 3989, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:21:18.625331', 'step': 3989, 'epoch': 1} {'type': 'loss', 'content': 0.13942252099514008, 'timestamp': '2025-10-01 04:21:18.628094', 'step': 3990, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:18.681281', 'step': 3990, 'epoch': 1} {'type': 'loss', 'content': 0.19709253311157227, 'timestamp': '2025-10-01 04:21:18.683594', 'step': 3991, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:18.745263', 'step': 3991, 'epoch': 1} {'type': 'loss', 'content': 0.1604738086462021, 'timestamp': '2025-10-01 04:21:18.759782', 'step': 3992, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:18.812666', 'step': 3992, 'epoch': 1} {'type': 'loss', 'content': 0.12216465175151825, 'timestamp': '2025-10-01 04:21:18.814918', 'step': 3993, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:18.867990', 'step': 3993, 'epoch': 1} {'type': 'loss', 'content': 0.15233542025089264, 'timestamp': '2025-10-01 04:21:18.870569', 'step': 3994, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:18.924418', 'step': 3994, 'epoch': 1} {'type': 'loss', 'content': 0.15151306986808777, 'timestamp': '2025-10-01 04:21:18.926610', 'step': 3995, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:18.979291', 'step': 3995, 'epoch': 1} {'type': 'loss', 'content': 0.20056858658790588, 'timestamp': '2025-10-01 04:21:18.985096', 'step': 3996, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:19.051751', 'step': 3996, 'epoch': 1} {'type': 'loss', 'content': 0.1652487814426422, 'timestamp': '2025-10-01 04:21:19.053986', 'step': 3997, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:19.106347', 'step': 3997, 'epoch': 1} {'type': 'loss', 'content': 0.2489931434392929, 'timestamp': '2025-10-01 04:21:19.108639', 'step': 3998, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:19.161642', 'step': 3998, 'epoch': 1} {'type': 'loss', 'content': 0.14306920766830444, 'timestamp': '2025-10-01 04:21:19.163897', 'step': 3999, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:21:19.216546', 'step': 3999, 'epoch': 1} {'type': 'loss', 'content': 0.23209816217422485, 'timestamp': '2025-10-01 04:21:19.222829', 'step': 4000, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 4000', 'timestamp': '2025-10-01 04:21:19.598048', 'step': 4000, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:19.652362', 'step': 4000, 'epoch': 1} {'type': 'loss', 'content': 0.16422301530838013, 'timestamp': '2025-10-01 04:21:19.654910', 'step': 4001, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:19.708347', 'step': 4001, 'epoch': 1} {'type': 'loss', 'content': 0.13665980100631714, 'timestamp': '2025-10-01 04:21:19.710994', 'step': 4002, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:19.765592', 'step': 4002, 'epoch': 1} {'type': 'loss', 'content': 0.13515323400497437, 'timestamp': '2025-10-01 04:21:19.770913', 'step': 4003, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:21:19.824026', 'step': 4003, 'epoch': 1} {'type': 'loss', 'content': 0.20731306076049805, 'timestamp': '2025-10-01 04:21:19.830068', 'step': 4004, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:21:19.882204', 'step': 4004, 'epoch': 1} {'type': 'loss', 'content': 0.20181360840797424, 'timestamp': '2025-10-01 04:21:19.884473', 'step': 4005, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:19.937466', 'step': 4005, 'epoch': 1} {'type': 'loss', 'content': 0.2112330049276352, 'timestamp': '2025-10-01 04:21:19.940258', 'step': 4006, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:19.993760', 'step': 4006, 'epoch': 1} {'type': 'loss', 'content': 0.16607332229614258, 'timestamp': '2025-10-01 04:21:19.996037', 'step': 4007, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:20.050135', 'step': 4007, 'epoch': 1} {'type': 'loss', 'content': 0.14108285307884216, 'timestamp': '2025-10-01 04:21:20.056376', 'step': 4008, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:20.109145', 'step': 4008, 'epoch': 1} {'type': 'loss', 'content': 0.22597476840019226, 'timestamp': '2025-10-01 04:21:20.111366', 'step': 4009, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:20.170619', 'step': 4009, 'epoch': 1} {'type': 'loss', 'content': 0.12514235079288483, 'timestamp': '2025-10-01 04:21:20.173938', 'step': 4010, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:21:20.229188', 'step': 4010, 'epoch': 1} {'type': 'loss', 'content': 0.2235184758901596, 'timestamp': '2025-10-01 04:21:20.238864', 'step': 4011, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:20.292302', 'step': 4011, 'epoch': 1} {'type': 'loss', 'content': 0.13216865062713623, 'timestamp': '2025-10-01 04:21:20.304490', 'step': 4012, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:20.357665', 'step': 4012, 'epoch': 1} {'type': 'loss', 'content': 0.2740074098110199, 'timestamp': '2025-10-01 04:21:20.359873', 'step': 4013, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:20.412777', 'step': 4013, 'epoch': 1} {'type': 'loss', 'content': 0.1419563889503479, 'timestamp': '2025-10-01 04:21:20.415035', 'step': 4014, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:21:20.468241', 'step': 4014, 'epoch': 1} {'type': 'loss', 'content': 0.16886962950229645, 'timestamp': '2025-10-01 04:21:20.470843', 'step': 4015, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:21:20.524055', 'step': 4015, 'epoch': 1} {'type': 'loss', 'content': 0.20180006325244904, 'timestamp': '2025-10-01 04:21:20.530047', 'step': 4016, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:20.582858', 'step': 4016, 'epoch': 1} {'type': 'loss', 'content': 0.12214606255292892, 'timestamp': '2025-10-01 04:21:20.585230', 'step': 4017, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:20.639019', 'step': 4017, 'epoch': 1} {'type': 'loss', 'content': 0.15206247568130493, 'timestamp': '2025-10-01 04:21:20.642197', 'step': 4018, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:20.696084', 'step': 4018, 'epoch': 1} {'type': 'loss', 'content': 0.1697380393743515, 'timestamp': '2025-10-01 04:21:20.699382', 'step': 4019, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:20.752743', 'step': 4019, 'epoch': 1} {'type': 'loss', 'content': 0.11266014724969864, 'timestamp': '2025-10-01 04:21:20.758560', 'step': 4020, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:20.811950', 'step': 4020, 'epoch': 1} {'type': 'loss', 'content': 0.16141115128993988, 'timestamp': '2025-10-01 04:21:20.814071', 'step': 4021, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:20.867206', 'step': 4021, 'epoch': 1} {'type': 'loss', 'content': 0.30819639563560486, 'timestamp': '2025-10-01 04:21:20.869402', 'step': 4022, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:20.922857', 'step': 4022, 'epoch': 1} {'type': 'loss', 'content': 0.12295133620500565, 'timestamp': '2025-10-01 04:21:20.925115', 'step': 4023, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:20.983246', 'step': 4023, 'epoch': 1} {'type': 'loss', 'content': 0.19249004125595093, 'timestamp': '2025-10-01 04:21:20.989286', 'step': 4024, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:21.041324', 'step': 4024, 'epoch': 1} {'type': 'loss', 'content': 0.13302567601203918, 'timestamp': '2025-10-01 04:21:21.043548', 'step': 4025, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:21.096854', 'step': 4025, 'epoch': 1} {'type': 'loss', 'content': 0.16010892391204834, 'timestamp': '2025-10-01 04:21:21.099139', 'step': 4026, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:21.152481', 'step': 4026, 'epoch': 1} {'type': 'loss', 'content': 0.1706400215625763, 'timestamp': '2025-10-01 04:21:21.154754', 'step': 4027, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:21.207702', 'step': 4027, 'epoch': 1} {'type': 'loss', 'content': 0.15908575057983398, 'timestamp': '2025-10-01 04:21:21.213476', 'step': 4028, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:21.265645', 'step': 4028, 'epoch': 1} {'type': 'loss', 'content': 0.20314227044582367, 'timestamp': '2025-10-01 04:21:21.267820', 'step': 4029, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:21.320798', 'step': 4029, 'epoch': 1} {'type': 'loss', 'content': 0.1433996558189392, 'timestamp': '2025-10-01 04:21:21.323192', 'step': 4030, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:21:21.376579', 'step': 4030, 'epoch': 1} {'type': 'loss', 'content': 0.1682722121477127, 'timestamp': '2025-10-01 04:21:21.379078', 'step': 4031, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:21.432879', 'step': 4031, 'epoch': 1} {'type': 'loss', 'content': 0.17457780241966248, 'timestamp': '2025-10-01 04:21:21.438798', 'step': 4032, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:21.499428', 'step': 4032, 'epoch': 1} {'type': 'loss', 'content': 0.1752343624830246, 'timestamp': '2025-10-01 04:21:21.501685', 'step': 4033, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:21:21.554717', 'step': 4033, 'epoch': 1} {'type': 'loss', 'content': 0.17950375378131866, 'timestamp': '2025-10-01 04:21:21.556901', 'step': 4034, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:21.610209', 'step': 4034, 'epoch': 1} {'type': 'loss', 'content': 0.1787317842245102, 'timestamp': '2025-10-01 04:21:21.612316', 'step': 4035, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:21:21.665316', 'step': 4035, 'epoch': 1} {'type': 'loss', 'content': 0.23888050019741058, 'timestamp': '2025-10-01 04:21:21.671419', 'step': 4036, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:21.723793', 'step': 4036, 'epoch': 1} {'type': 'loss', 'content': 0.18351610004901886, 'timestamp': '2025-10-01 04:21:21.726186', 'step': 4037, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:21.778825', 'step': 4037, 'epoch': 1} {'type': 'loss', 'content': 0.1970251351594925, 'timestamp': '2025-10-01 04:21:21.781077', 'step': 4038, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:21.835384', 'step': 4038, 'epoch': 1} {'type': 'loss', 'content': 0.1584470421075821, 'timestamp': '2025-10-01 04:21:21.837631', 'step': 4039, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:21.902648', 'step': 4039, 'epoch': 1} {'type': 'loss', 'content': 0.08785244822502136, 'timestamp': '2025-10-01 04:21:21.908314', 'step': 4040, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:21.963818', 'step': 4040, 'epoch': 1} {'type': 'loss', 'content': 0.1562519520521164, 'timestamp': '2025-10-01 04:21:21.966178', 'step': 4041, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:22.025569', 'step': 4041, 'epoch': 1} {'type': 'loss', 'content': 0.13934636116027832, 'timestamp': '2025-10-01 04:21:22.027845', 'step': 4042, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:22.080162', 'step': 4042, 'epoch': 1} {'type': 'loss', 'content': 0.20486190915107727, 'timestamp': '2025-10-01 04:21:22.082482', 'step': 4043, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:22.136547', 'step': 4043, 'epoch': 1} {'type': 'loss', 'content': 0.15456806123256683, 'timestamp': '2025-10-01 04:21:22.151918', 'step': 4044, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:21:22.204299', 'step': 4044, 'epoch': 1} {'type': 'loss', 'content': 0.15756890177726746, 'timestamp': '2025-10-01 04:21:22.216788', 'step': 4045, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:22.275919', 'step': 4045, 'epoch': 1} {'type': 'loss', 'content': 0.12099563330411911, 'timestamp': '2025-10-01 04:21:22.285337', 'step': 4046, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:22.340328', 'step': 4046, 'epoch': 1} {'type': 'loss', 'content': 0.14875726401805878, 'timestamp': '2025-10-01 04:21:22.342820', 'step': 4047, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:22.407988', 'step': 4047, 'epoch': 1} {'type': 'loss', 'content': 0.22730371356010437, 'timestamp': '2025-10-01 04:21:22.414031', 'step': 4048, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:22.467557', 'step': 4048, 'epoch': 1} {'type': 'loss', 'content': 0.13854612410068512, 'timestamp': '2025-10-01 04:21:22.471842', 'step': 4049, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:22.530463', 'step': 4049, 'epoch': 1} {'type': 'loss', 'content': 0.22353634238243103, 'timestamp': '2025-10-01 04:21:22.536568', 'step': 4050, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:22.589703', 'step': 4050, 'epoch': 1} {'type': 'loss', 'content': 0.1810145229101181, 'timestamp': '2025-10-01 04:21:22.592779', 'step': 4051, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:22.651709', 'step': 4051, 'epoch': 1} {'type': 'loss', 'content': 0.183266282081604, 'timestamp': '2025-10-01 04:21:22.657632', 'step': 4052, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:22.715618', 'step': 4052, 'epoch': 1} {'type': 'loss', 'content': 0.08336944133043289, 'timestamp': '2025-10-01 04:21:22.718192', 'step': 4053, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:22.773429', 'step': 4053, 'epoch': 1} {'type': 'loss', 'content': 0.23659229278564453, 'timestamp': '2025-10-01 04:21:22.775933', 'step': 4054, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:21:22.829336', 'step': 4054, 'epoch': 1} {'type': 'loss', 'content': 0.15202926099300385, 'timestamp': '2025-10-01 04:21:22.833089', 'step': 4055, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:22.893350', 'step': 4055, 'epoch': 1} {'type': 'loss', 'content': 0.1531529426574707, 'timestamp': '2025-10-01 04:21:22.905343', 'step': 4056, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:22.958448', 'step': 4056, 'epoch': 1} {'type': 'loss', 'content': 0.21610037982463837, 'timestamp': '2025-10-01 04:21:22.960710', 'step': 4057, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:23.014174', 'step': 4057, 'epoch': 1} {'type': 'loss', 'content': 0.22267387807369232, 'timestamp': '2025-10-01 04:21:23.016674', 'step': 4058, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:21:23.073657', 'step': 4058, 'epoch': 1} {'type': 'loss', 'content': 0.1843048334121704, 'timestamp': '2025-10-01 04:21:23.076359', 'step': 4059, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:23.132273', 'step': 4059, 'epoch': 1} {'type': 'loss', 'content': 0.2086903601884842, 'timestamp': '2025-10-01 04:21:23.138447', 'step': 4060, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:23.194204', 'step': 4060, 'epoch': 1} {'type': 'loss', 'content': 0.25082409381866455, 'timestamp': '2025-10-01 04:21:23.196494', 'step': 4061, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:23.252615', 'step': 4061, 'epoch': 1} {'type': 'loss', 'content': 0.16807670891284943, 'timestamp': '2025-10-01 04:21:23.255412', 'step': 4062, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:23.318713', 'step': 4062, 'epoch': 1} {'type': 'loss', 'content': 0.20630012452602386, 'timestamp': '2025-10-01 04:21:23.321971', 'step': 4063, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:23.376172', 'step': 4063, 'epoch': 1} {'type': 'loss', 'content': 0.2470157891511917, 'timestamp': '2025-10-01 04:21:23.382370', 'step': 4064, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:23.437196', 'step': 4064, 'epoch': 1} {'type': 'loss', 'content': 0.16994556784629822, 'timestamp': '2025-10-01 04:21:23.441088', 'step': 4065, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:23.494602', 'step': 4065, 'epoch': 1} {'type': 'loss', 'content': 0.15134532749652863, 'timestamp': '2025-10-01 04:21:23.497028', 'step': 4066, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:23.550786', 'step': 4066, 'epoch': 1} {'type': 'loss', 'content': 0.23157617449760437, 'timestamp': '2025-10-01 04:21:23.553109', 'step': 4067, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:23.606443', 'step': 4067, 'epoch': 1} {'type': 'loss', 'content': 0.11911019682884216, 'timestamp': '2025-10-01 04:21:23.614973', 'step': 4068, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:23.668026', 'step': 4068, 'epoch': 1} {'type': 'loss', 'content': 0.17536531388759613, 'timestamp': '2025-10-01 04:21:23.670256', 'step': 4069, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:21:23.728277', 'step': 4069, 'epoch': 1} {'type': 'loss', 'content': 0.14931480586528778, 'timestamp': '2025-10-01 04:21:23.733211', 'step': 4070, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:23.797455', 'step': 4070, 'epoch': 1} {'type': 'loss', 'content': 0.10295358300209045, 'timestamp': '2025-10-01 04:21:23.799663', 'step': 4071, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:23.859898', 'step': 4071, 'epoch': 1} {'type': 'loss', 'content': 0.15366093814373016, 'timestamp': '2025-10-01 04:21:23.865717', 'step': 4072, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:23.923807', 'step': 4072, 'epoch': 1} {'type': 'loss', 'content': 0.2753216028213501, 'timestamp': '2025-10-01 04:21:23.927017', 'step': 4073, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:23.984371', 'step': 4073, 'epoch': 1} {'type': 'loss', 'content': 0.09285182505846024, 'timestamp': '2025-10-01 04:21:23.989064', 'step': 4074, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:21:24.055378', 'step': 4074, 'epoch': 1} {'type': 'loss', 'content': 0.15967585146427155, 'timestamp': '2025-10-01 04:21:24.057697', 'step': 4075, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:21:24.111165', 'step': 4075, 'epoch': 1} {'type': 'loss', 'content': 0.17513087391853333, 'timestamp': '2025-10-01 04:21:24.117033', 'step': 4076, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:24.194828', 'step': 4076, 'epoch': 1} {'type': 'loss', 'content': 0.18363694846630096, 'timestamp': '2025-10-01 04:21:24.196992', 'step': 4077, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:24.254607', 'step': 4077, 'epoch': 1} {'type': 'loss', 'content': 0.1439172923564911, 'timestamp': '2025-10-01 04:21:24.262057', 'step': 4078, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:24.323544', 'step': 4078, 'epoch': 1} {'type': 'loss', 'content': 0.11169840395450592, 'timestamp': '2025-10-01 04:21:24.326297', 'step': 4079, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:24.393045', 'step': 4079, 'epoch': 1} {'type': 'loss', 'content': 0.2565900981426239, 'timestamp': '2025-10-01 04:21:24.398958', 'step': 4080, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:24.451181', 'step': 4080, 'epoch': 1} {'type': 'loss', 'content': 0.16411945223808289, 'timestamp': '2025-10-01 04:21:24.453448', 'step': 4081, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:24.514677', 'step': 4081, 'epoch': 1} {'type': 'loss', 'content': 0.20290552079677582, 'timestamp': '2025-10-01 04:21:24.516709', 'step': 4082, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:24.569861', 'step': 4082, 'epoch': 1} {'type': 'loss', 'content': 0.13452517986297607, 'timestamp': '2025-10-01 04:21:24.572051', 'step': 4083, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:21:24.624916', 'step': 4083, 'epoch': 1} {'type': 'loss', 'content': 0.18032941222190857, 'timestamp': '2025-10-01 04:21:24.640114', 'step': 4084, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:24.691839', 'step': 4084, 'epoch': 1} {'type': 'loss', 'content': 0.19864216446876526, 'timestamp': '2025-10-01 04:21:24.694454', 'step': 4085, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:24.751307', 'step': 4085, 'epoch': 1} {'type': 'loss', 'content': 0.136973574757576, 'timestamp': '2025-10-01 04:21:24.753671', 'step': 4086, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:24.806734', 'step': 4086, 'epoch': 1} {'type': 'loss', 'content': 0.16607090830802917, 'timestamp': '2025-10-01 04:21:24.809043', 'step': 4087, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:24.863613', 'step': 4087, 'epoch': 1} {'type': 'loss', 'content': 0.19432973861694336, 'timestamp': '2025-10-01 04:21:24.869587', 'step': 4088, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:24.922434', 'step': 4088, 'epoch': 1} {'type': 'loss', 'content': 0.10285821557044983, 'timestamp': '2025-10-01 04:21:24.924605', 'step': 4089, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:24.977850', 'step': 4089, 'epoch': 1} {'type': 'loss', 'content': 0.05182043835520744, 'timestamp': '2025-10-01 04:21:24.980109', 'step': 4090, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:25.033473', 'step': 4090, 'epoch': 1} {'type': 'loss', 'content': 0.12194348871707916, 'timestamp': '2025-10-01 04:21:25.035696', 'step': 4091, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:21:25.089277', 'step': 4091, 'epoch': 1} {'type': 'loss', 'content': 0.12790623307228088, 'timestamp': '2025-10-01 04:21:25.094917', 'step': 4092, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:25.147037', 'step': 4092, 'epoch': 1} {'type': 'loss', 'content': 0.17883561551570892, 'timestamp': '2025-10-01 04:21:25.149256', 'step': 4093, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:25.201906', 'step': 4093, 'epoch': 1} {'type': 'loss', 'content': 0.1308758705854416, 'timestamp': '2025-10-01 04:21:25.204556', 'step': 4094, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:25.257422', 'step': 4094, 'epoch': 1} {'type': 'loss', 'content': 0.18096163868904114, 'timestamp': '2025-10-01 04:21:25.259883', 'step': 4095, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:25.312837', 'step': 4095, 'epoch': 1} {'type': 'loss', 'content': 0.20913149416446686, 'timestamp': '2025-10-01 04:21:25.319015', 'step': 4096, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:25.371475', 'step': 4096, 'epoch': 1} {'type': 'loss', 'content': 0.13570626080036163, 'timestamp': '2025-10-01 04:21:25.373774', 'step': 4097, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:21:25.427339', 'step': 4097, 'epoch': 1} {'type': 'loss', 'content': 0.1462157964706421, 'timestamp': '2025-10-01 04:21:25.429561', 'step': 4098, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:25.482708', 'step': 4098, 'epoch': 1} {'type': 'loss', 'content': 0.24050277471542358, 'timestamp': '2025-10-01 04:21:25.485284', 'step': 4099, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:25.538541', 'step': 4099, 'epoch': 1} {'type': 'loss', 'content': 0.16305479407310486, 'timestamp': '2025-10-01 04:21:25.544372', 'step': 4100, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:25.597034', 'step': 4100, 'epoch': 1} {'type': 'loss', 'content': 0.1920921802520752, 'timestamp': '2025-10-01 04:21:25.599591', 'step': 4101, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:21:25.652726', 'step': 4101, 'epoch': 1} {'type': 'loss', 'content': 0.15902742743492126, 'timestamp': '2025-10-01 04:21:25.655162', 'step': 4102, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:25.708585', 'step': 4102, 'epoch': 1} {'type': 'loss', 'content': 0.24954812228679657, 'timestamp': '2025-10-01 04:21:25.710948', 'step': 4103, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:25.764639', 'step': 4103, 'epoch': 1} {'type': 'loss', 'content': 0.12308573722839355, 'timestamp': '2025-10-01 04:21:25.770455', 'step': 4104, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:21:25.831884', 'step': 4104, 'epoch': 1} {'type': 'loss', 'content': 0.08627395331859589, 'timestamp': '2025-10-01 04:21:25.834110', 'step': 4105, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:25.891315', 'step': 4105, 'epoch': 1} {'type': 'loss', 'content': 0.17386995255947113, 'timestamp': '2025-10-01 04:21:25.893408', 'step': 4106, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:25.947108', 'step': 4106, 'epoch': 1} {'type': 'loss', 'content': 0.20835919678211212, 'timestamp': '2025-10-01 04:21:25.957067', 'step': 4107, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:26.011438', 'step': 4107, 'epoch': 1} {'type': 'loss', 'content': 0.18773798644542694, 'timestamp': '2025-10-01 04:21:26.017146', 'step': 4108, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:26.069197', 'step': 4108, 'epoch': 1} {'type': 'loss', 'content': 0.1747835874557495, 'timestamp': '2025-10-01 04:21:26.071895', 'step': 4109, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:26.125226', 'step': 4109, 'epoch': 1} {'type': 'loss', 'content': 0.12678276002407074, 'timestamp': '2025-10-01 04:21:26.127441', 'step': 4110, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:26.180741', 'step': 4110, 'epoch': 1} {'type': 'loss', 'content': 0.2954025864601135, 'timestamp': '2025-10-01 04:21:26.182921', 'step': 4111, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:26.242423', 'step': 4111, 'epoch': 1} {'type': 'loss', 'content': 0.1220197007060051, 'timestamp': '2025-10-01 04:21:26.247980', 'step': 4112, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:26.300759', 'step': 4112, 'epoch': 1} {'type': 'loss', 'content': 0.18286655843257904, 'timestamp': '2025-10-01 04:21:26.302789', 'step': 4113, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:21:26.367757', 'step': 4113, 'epoch': 1} {'type': 'loss', 'content': 0.1455436795949936, 'timestamp': '2025-10-01 04:21:26.374784', 'step': 4114, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:26.429706', 'step': 4114, 'epoch': 1} {'type': 'loss', 'content': 0.13187910616397858, 'timestamp': '2025-10-01 04:21:26.431943', 'step': 4115, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:21:26.484839', 'step': 4115, 'epoch': 1} {'type': 'loss', 'content': 0.08574825525283813, 'timestamp': '2025-10-01 04:21:26.490826', 'step': 4116, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:26.551478', 'step': 4116, 'epoch': 1} {'type': 'loss', 'content': 0.15362760424613953, 'timestamp': '2025-10-01 04:21:26.553864', 'step': 4117, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:21:26.607242', 'step': 4117, 'epoch': 1} {'type': 'loss', 'content': 0.12786751985549927, 'timestamp': '2025-10-01 04:21:26.609446', 'step': 4118, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:26.679099', 'step': 4118, 'epoch': 1} {'type': 'loss', 'content': 0.1862012892961502, 'timestamp': '2025-10-01 04:21:26.681410', 'step': 4119, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:26.734275', 'step': 4119, 'epoch': 1} {'type': 'loss', 'content': 0.16488981246948242, 'timestamp': '2025-10-01 04:21:26.740108', 'step': 4120, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:26.796308', 'step': 4120, 'epoch': 1} {'type': 'loss', 'content': 0.2165474146604538, 'timestamp': '2025-10-01 04:21:26.799128', 'step': 4121, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:26.852051', 'step': 4121, 'epoch': 1} {'type': 'loss', 'content': 0.14596286416053772, 'timestamp': '2025-10-01 04:21:26.854146', 'step': 4122, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:26.907543', 'step': 4122, 'epoch': 1} {'type': 'loss', 'content': 0.23198570311069489, 'timestamp': '2025-10-01 04:21:26.924096', 'step': 4123, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:26.977701', 'step': 4123, 'epoch': 1} {'type': 'loss', 'content': 0.17405477166175842, 'timestamp': '2025-10-01 04:21:26.983434', 'step': 4124, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:27.036182', 'step': 4124, 'epoch': 1} {'type': 'loss', 'content': 0.1972193866968155, 'timestamp': '2025-10-01 04:21:27.038435', 'step': 4125, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:27.100412', 'step': 4125, 'epoch': 1} {'type': 'loss', 'content': 0.232780322432518, 'timestamp': '2025-10-01 04:21:27.105906', 'step': 4126, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:27.159438', 'step': 4126, 'epoch': 1} {'type': 'loss', 'content': 0.11900955438613892, 'timestamp': '2025-10-01 04:21:27.163884', 'step': 4127, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:21:27.216499', 'step': 4127, 'epoch': 1} {'type': 'loss', 'content': 0.14072588086128235, 'timestamp': '2025-10-01 04:21:27.228039', 'step': 4128, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:27.281735', 'step': 4128, 'epoch': 1} {'type': 'loss', 'content': 0.10684078186750412, 'timestamp': '2025-10-01 04:21:27.287537', 'step': 4129, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:27.353728', 'step': 4129, 'epoch': 1} {'type': 'loss', 'content': 0.1761329174041748, 'timestamp': '2025-10-01 04:21:27.355894', 'step': 4130, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:27.409153', 'step': 4130, 'epoch': 1} {'type': 'loss', 'content': 0.2266266942024231, 'timestamp': '2025-10-01 04:21:27.411357', 'step': 4131, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:27.468447', 'step': 4131, 'epoch': 1} {'type': 'loss', 'content': 0.13815538585186005, 'timestamp': '2025-10-01 04:21:27.479695', 'step': 4132, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:27.536498', 'step': 4132, 'epoch': 1} {'type': 'loss', 'content': 0.2315506786108017, 'timestamp': '2025-10-01 04:21:27.540694', 'step': 4133, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:21:27.594112', 'step': 4133, 'epoch': 1} {'type': 'loss', 'content': 0.18882925808429718, 'timestamp': '2025-10-01 04:21:27.596903', 'step': 4134, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:27.656409', 'step': 4134, 'epoch': 1} {'type': 'loss', 'content': 0.17524400353431702, 'timestamp': '2025-10-01 04:21:27.659135', 'step': 4135, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:27.726351', 'step': 4135, 'epoch': 1} {'type': 'loss', 'content': 0.17794962227344513, 'timestamp': '2025-10-01 04:21:27.733906', 'step': 4136, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:27.790858', 'step': 4136, 'epoch': 1} {'type': 'loss', 'content': 0.10201393067836761, 'timestamp': '2025-10-01 04:21:27.793298', 'step': 4137, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:21:27.847568', 'step': 4137, 'epoch': 1} {'type': 'loss', 'content': 0.20700964331626892, 'timestamp': '2025-10-01 04:21:27.849783', 'step': 4138, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:27.907134', 'step': 4138, 'epoch': 1} {'type': 'loss', 'content': 0.19086185097694397, 'timestamp': '2025-10-01 04:21:27.910188', 'step': 4139, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:27.966467', 'step': 4139, 'epoch': 1} {'type': 'loss', 'content': 0.1260841339826584, 'timestamp': '2025-10-01 04:21:27.972550', 'step': 4140, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:28.026755', 'step': 4140, 'epoch': 1} {'type': 'loss', 'content': 0.14363978803157806, 'timestamp': '2025-10-01 04:21:28.028829', 'step': 4141, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:28.087684', 'step': 4141, 'epoch': 1} {'type': 'loss', 'content': 0.09371504932641983, 'timestamp': '2025-10-01 04:21:28.096565', 'step': 4142, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:28.151577', 'step': 4142, 'epoch': 1} {'type': 'loss', 'content': 0.14421331882476807, 'timestamp': '2025-10-01 04:21:28.153839', 'step': 4143, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:28.206484', 'step': 4143, 'epoch': 1} {'type': 'loss', 'content': 0.19365191459655762, 'timestamp': '2025-10-01 04:21:28.212429', 'step': 4144, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:28.265403', 'step': 4144, 'epoch': 1} {'type': 'loss', 'content': 0.1688532531261444, 'timestamp': '2025-10-01 04:21:28.268169', 'step': 4145, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:28.322058', 'step': 4145, 'epoch': 1} {'type': 'loss', 'content': 0.2110077291727066, 'timestamp': '2025-10-01 04:21:28.324780', 'step': 4146, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:21:28.380620', 'step': 4146, 'epoch': 1} {'type': 'loss', 'content': 0.1969524472951889, 'timestamp': '2025-10-01 04:21:28.383090', 'step': 4147, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:28.437015', 'step': 4147, 'epoch': 1} {'type': 'loss', 'content': 0.11597386747598648, 'timestamp': '2025-10-01 04:21:28.442759', 'step': 4148, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:21:28.495203', 'step': 4148, 'epoch': 1} {'type': 'loss', 'content': 0.09412924945354462, 'timestamp': '2025-10-01 04:21:28.497401', 'step': 4149, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:21:28.550637', 'step': 4149, 'epoch': 1} {'type': 'loss', 'content': 0.1955631971359253, 'timestamp': '2025-10-01 04:21:28.552829', 'step': 4150, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:28.605847', 'step': 4150, 'epoch': 1} {'type': 'loss', 'content': 0.17064721882343292, 'timestamp': '2025-10-01 04:21:28.608001', 'step': 4151, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:28.660909', 'step': 4151, 'epoch': 1} {'type': 'loss', 'content': 0.10016772150993347, 'timestamp': '2025-10-01 04:21:28.666684', 'step': 4152, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:28.719649', 'step': 4152, 'epoch': 1} {'type': 'loss', 'content': 0.18591538071632385, 'timestamp': '2025-10-01 04:21:28.723382', 'step': 4153, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:28.775691', 'step': 4153, 'epoch': 1} {'type': 'loss', 'content': 0.20397599041461945, 'timestamp': '2025-10-01 04:21:28.778125', 'step': 4154, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:28.831387', 'step': 4154, 'epoch': 1} {'type': 'loss', 'content': 0.07419165223836899, 'timestamp': '2025-10-01 04:21:28.834004', 'step': 4155, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:28.887564', 'step': 4155, 'epoch': 1} {'type': 'loss', 'content': 0.17607218027114868, 'timestamp': '2025-10-01 04:21:28.893699', 'step': 4156, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:28.947691', 'step': 4156, 'epoch': 1} {'type': 'loss', 'content': 0.20919004082679749, 'timestamp': '2025-10-01 04:21:28.950095', 'step': 4157, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:21:29.005985', 'step': 4157, 'epoch': 1} {'type': 'loss', 'content': 0.16422024369239807, 'timestamp': '2025-10-01 04:21:29.008842', 'step': 4158, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:29.061767', 'step': 4158, 'epoch': 1} {'type': 'loss', 'content': 0.1988997906446457, 'timestamp': '2025-10-01 04:21:29.064016', 'step': 4159, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:29.117327', 'step': 4159, 'epoch': 1} {'type': 'loss', 'content': 0.21532224118709564, 'timestamp': '2025-10-01 04:21:29.123329', 'step': 4160, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:29.175881', 'step': 4160, 'epoch': 1} {'type': 'loss', 'content': 0.14051547646522522, 'timestamp': '2025-10-01 04:21:29.178120', 'step': 4161, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:21:29.231336', 'step': 4161, 'epoch': 1} {'type': 'loss', 'content': 0.18916137516498566, 'timestamp': '2025-10-01 04:21:29.233660', 'step': 4162, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:29.287101', 'step': 4162, 'epoch': 1} {'type': 'loss', 'content': 0.1397497057914734, 'timestamp': '2025-10-01 04:21:29.289420', 'step': 4163, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:21:29.350002', 'step': 4163, 'epoch': 1} {'type': 'loss', 'content': 0.09503088146448135, 'timestamp': '2025-10-01 04:21:29.355894', 'step': 4164, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:29.408605', 'step': 4164, 'epoch': 1} {'type': 'loss', 'content': 0.14461112022399902, 'timestamp': '2025-10-01 04:21:29.410673', 'step': 4165, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:29.463649', 'step': 4165, 'epoch': 1} {'type': 'loss', 'content': 0.156693696975708, 'timestamp': '2025-10-01 04:21:29.466026', 'step': 4166, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:29.518666', 'step': 4166, 'epoch': 1} {'type': 'loss', 'content': 0.16525326669216156, 'timestamp': '2025-10-01 04:21:29.526521', 'step': 4167, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:29.584634', 'step': 4167, 'epoch': 1} {'type': 'loss', 'content': 0.11639948189258575, 'timestamp': '2025-10-01 04:21:29.593934', 'step': 4168, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:21:29.646898', 'step': 4168, 'epoch': 1} {'type': 'loss', 'content': 0.11105609685182571, 'timestamp': '2025-10-01 04:21:29.649039', 'step': 4169, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:29.702535', 'step': 4169, 'epoch': 1} {'type': 'loss', 'content': 0.12225988507270813, 'timestamp': '2025-10-01 04:21:29.704706', 'step': 4170, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:29.758556', 'step': 4170, 'epoch': 1} {'type': 'loss', 'content': 0.13911527395248413, 'timestamp': '2025-10-01 04:21:29.760845', 'step': 4171, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:29.816274', 'step': 4171, 'epoch': 1} {'type': 'loss', 'content': 0.147738516330719, 'timestamp': '2025-10-01 04:21:29.826618', 'step': 4172, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:29.882246', 'step': 4172, 'epoch': 1} {'type': 'loss', 'content': 0.1608598530292511, 'timestamp': '2025-10-01 04:21:29.884553', 'step': 4173, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:29.938017', 'step': 4173, 'epoch': 1} {'type': 'loss', 'content': 0.10870124399662018, 'timestamp': '2025-10-01 04:21:29.940669', 'step': 4174, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:29.994562', 'step': 4174, 'epoch': 1} {'type': 'loss', 'content': 0.10622572153806686, 'timestamp': '2025-10-01 04:21:29.997150', 'step': 4175, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:21:30.051656', 'step': 4175, 'epoch': 1} {'type': 'loss', 'content': 0.0968601256608963, 'timestamp': '2025-10-01 04:21:30.057994', 'step': 4176, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:30.111231', 'step': 4176, 'epoch': 1} {'type': 'loss', 'content': 0.17359937727451324, 'timestamp': '2025-10-01 04:21:30.113330', 'step': 4177, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:30.166283', 'step': 4177, 'epoch': 1} {'type': 'loss', 'content': 0.1597490906715393, 'timestamp': '2025-10-01 04:21:30.168796', 'step': 4178, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:30.225930', 'step': 4178, 'epoch': 1} {'type': 'loss', 'content': 0.21836169064044952, 'timestamp': '2025-10-01 04:21:30.228220', 'step': 4179, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:30.281027', 'step': 4179, 'epoch': 1} {'type': 'loss', 'content': 0.14617542922496796, 'timestamp': '2025-10-01 04:21:30.289746', 'step': 4180, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:30.343936', 'step': 4180, 'epoch': 1} {'type': 'loss', 'content': 0.05467535927891731, 'timestamp': '2025-10-01 04:21:30.345925', 'step': 4181, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:30.410009', 'step': 4181, 'epoch': 1} {'type': 'loss', 'content': 0.14949791133403778, 'timestamp': '2025-10-01 04:21:30.413603', 'step': 4182, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:30.467238', 'step': 4182, 'epoch': 1} {'type': 'loss', 'content': 0.223349466919899, 'timestamp': '2025-10-01 04:21:30.471267', 'step': 4183, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:30.523939', 'step': 4183, 'epoch': 1} {'type': 'loss', 'content': 0.12421822547912598, 'timestamp': '2025-10-01 04:21:30.531070', 'step': 4184, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:30.589881', 'step': 4184, 'epoch': 1} {'type': 'loss', 'content': 0.15375760197639465, 'timestamp': '2025-10-01 04:21:30.592073', 'step': 4185, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:30.644764', 'step': 4185, 'epoch': 1} {'type': 'loss', 'content': 0.112287238240242, 'timestamp': '2025-10-01 04:21:30.647000', 'step': 4186, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:30.700566', 'step': 4186, 'epoch': 1} {'type': 'loss', 'content': 0.20893070101737976, 'timestamp': '2025-10-01 04:21:30.702845', 'step': 4187, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:30.771811', 'step': 4187, 'epoch': 1} {'type': 'loss', 'content': 0.1120159700512886, 'timestamp': '2025-10-01 04:21:30.777763', 'step': 4188, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:30.829733', 'step': 4188, 'epoch': 1} {'type': 'loss', 'content': 0.13713906705379486, 'timestamp': '2025-10-01 04:21:30.831899', 'step': 4189, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:21:30.889916', 'step': 4189, 'epoch': 1} {'type': 'loss', 'content': 0.25129324197769165, 'timestamp': '2025-10-01 04:21:30.892278', 'step': 4190, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:30.945987', 'step': 4190, 'epoch': 1} {'type': 'loss', 'content': 0.11434633284807205, 'timestamp': '2025-10-01 04:21:30.948066', 'step': 4191, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:31.001383', 'step': 4191, 'epoch': 1} {'type': 'loss', 'content': 0.1452975869178772, 'timestamp': '2025-10-01 04:21:31.007360', 'step': 4192, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:31.059438', 'step': 4192, 'epoch': 1} {'type': 'loss', 'content': 0.17293542623519897, 'timestamp': '2025-10-01 04:21:31.061789', 'step': 4193, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:21:31.114218', 'step': 4193, 'epoch': 1} {'type': 'loss', 'content': 0.23895129561424255, 'timestamp': '2025-10-01 04:21:31.116518', 'step': 4194, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-01 04:21:44.100374', 'step': 4194, 'epoch': 1} {'type': 'pplx', 'content': 15126.135876360722, 'timestamp': '2025-10-01 04:21:44.103174', 'step': 4194, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:44.156665', 'step': 4194, 'epoch': 1} {'type': 'loss', 'content': 0.19060547649860382, 'timestamp': '2025-10-01 04:21:44.159827', 'step': 4195, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:44.215297', 'step': 4195, 'epoch': 1} {'type': 'loss', 'content': 0.12234939634799957, 'timestamp': '2025-10-01 04:21:44.221147', 'step': 4196, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:44.274130', 'step': 4196, 'epoch': 1} {'type': 'loss', 'content': 0.13258299231529236, 'timestamp': '2025-10-01 04:21:44.276109', 'step': 4197, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:44.329373', 'step': 4197, 'epoch': 1} {'type': 'loss', 'content': 0.14263075590133667, 'timestamp': '2025-10-01 04:21:44.331626', 'step': 4198, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:44.392144', 'step': 4198, 'epoch': 1} {'type': 'loss', 'content': 0.13224577903747559, 'timestamp': '2025-10-01 04:21:44.395280', 'step': 4199, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:44.460460', 'step': 4199, 'epoch': 1} {'type': 'loss', 'content': 0.18115553259849548, 'timestamp': '2025-10-01 04:21:44.465715', 'step': 4200, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:44.518745', 'step': 4200, 'epoch': 1} {'type': 'loss', 'content': 0.11147661507129669, 'timestamp': '2025-10-01 04:21:44.520500', 'step': 4201, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:44.573805', 'step': 4201, 'epoch': 1} {'type': 'loss', 'content': 0.23224005103111267, 'timestamp': '2025-10-01 04:21:44.575700', 'step': 4202, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:44.634925', 'step': 4202, 'epoch': 1} {'type': 'loss', 'content': 0.20834438502788544, 'timestamp': '2025-10-01 04:21:44.638365', 'step': 4203, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:21:44.692156', 'step': 4203, 'epoch': 1} {'type': 'loss', 'content': 0.274040162563324, 'timestamp': '2025-10-01 04:21:44.698138', 'step': 4204, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:44.773247', 'step': 4204, 'epoch': 1} {'type': 'loss', 'content': 0.13042379915714264, 'timestamp': '2025-10-01 04:21:44.775209', 'step': 4205, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:44.832919', 'step': 4205, 'epoch': 1} {'type': 'loss', 'content': 0.16196471452713013, 'timestamp': '2025-10-01 04:21:44.834620', 'step': 4206, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:44.887730', 'step': 4206, 'epoch': 1} {'type': 'loss', 'content': 0.1565215289592743, 'timestamp': '2025-10-01 04:21:44.892336', 'step': 4207, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:44.947542', 'step': 4207, 'epoch': 1} {'type': 'loss', 'content': 0.1588546484708786, 'timestamp': '2025-10-01 04:21:44.953126', 'step': 4208, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:45.013366', 'step': 4208, 'epoch': 1} {'type': 'loss', 'content': 0.20893427729606628, 'timestamp': '2025-10-01 04:21:45.016145', 'step': 4209, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:45.070258', 'step': 4209, 'epoch': 1} {'type': 'loss', 'content': 0.20481130480766296, 'timestamp': '2025-10-01 04:21:45.072965', 'step': 4210, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:45.127298', 'step': 4210, 'epoch': 1} {'type': 'loss', 'content': 0.12705937027931213, 'timestamp': '2025-10-01 04:21:45.129436', 'step': 4211, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:45.183414', 'step': 4211, 'epoch': 1} {'type': 'loss', 'content': 0.28231823444366455, 'timestamp': '2025-10-01 04:21:45.189575', 'step': 4212, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:45.244116', 'step': 4212, 'epoch': 1} {'type': 'loss', 'content': 0.13381026685237885, 'timestamp': '2025-10-01 04:21:45.246296', 'step': 4213, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:45.314561', 'step': 4213, 'epoch': 1} {'type': 'loss', 'content': 0.17061232030391693, 'timestamp': '2025-10-01 04:21:45.316492', 'step': 4214, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:45.370680', 'step': 4214, 'epoch': 1} {'type': 'loss', 'content': 0.19463655352592468, 'timestamp': '2025-10-01 04:21:45.375013', 'step': 4215, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:45.429332', 'step': 4215, 'epoch': 1} {'type': 'loss', 'content': 0.20018070936203003, 'timestamp': '2025-10-01 04:21:45.436258', 'step': 4216, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:45.489364', 'step': 4216, 'epoch': 1} {'type': 'loss', 'content': 0.17392398416996002, 'timestamp': '2025-10-01 04:21:45.493301', 'step': 4217, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:45.562091', 'step': 4217, 'epoch': 1} {'type': 'loss', 'content': 0.20019149780273438, 'timestamp': '2025-10-01 04:21:45.564267', 'step': 4218, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:45.620458', 'step': 4218, 'epoch': 1} {'type': 'loss', 'content': 0.15622644126415253, 'timestamp': '2025-10-01 04:21:45.623040', 'step': 4219, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:21:45.696980', 'step': 4219, 'epoch': 1} {'type': 'loss', 'content': 0.1372634321451187, 'timestamp': '2025-10-01 04:21:45.710760', 'step': 4220, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:45.763794', 'step': 4220, 'epoch': 1} {'type': 'loss', 'content': 0.10372229665517807, 'timestamp': '2025-10-01 04:21:45.772665', 'step': 4221, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:45.831985', 'step': 4221, 'epoch': 1} {'type': 'loss', 'content': 0.11803840100765228, 'timestamp': '2025-10-01 04:21:45.835223', 'step': 4222, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:45.890365', 'step': 4222, 'epoch': 1} {'type': 'loss', 'content': 0.1820746511220932, 'timestamp': '2025-10-01 04:21:45.893965', 'step': 4223, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:45.961950', 'step': 4223, 'epoch': 1} {'type': 'loss', 'content': 0.19324524700641632, 'timestamp': '2025-10-01 04:21:45.968152', 'step': 4224, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:46.021803', 'step': 4224, 'epoch': 1} {'type': 'loss', 'content': 0.138607919216156, 'timestamp': '2025-10-01 04:21:46.024063', 'step': 4225, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:21:46.078569', 'step': 4225, 'epoch': 1} {'type': 'loss', 'content': 0.22122366726398468, 'timestamp': '2025-10-01 04:21:46.089065', 'step': 4226, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:46.143310', 'step': 4226, 'epoch': 1} {'type': 'loss', 'content': 0.26431840658187866, 'timestamp': '2025-10-01 04:21:46.145657', 'step': 4227, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:46.199548', 'step': 4227, 'epoch': 1} {'type': 'loss', 'content': 0.17680852115154266, 'timestamp': '2025-10-01 04:21:46.205787', 'step': 4228, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:21:46.258578', 'step': 4228, 'epoch': 1} {'type': 'loss', 'content': 0.07362335175275803, 'timestamp': '2025-10-01 04:21:46.261538', 'step': 4229, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:46.315895', 'step': 4229, 'epoch': 1} {'type': 'loss', 'content': 0.14063625037670135, 'timestamp': '2025-10-01 04:21:46.323088', 'step': 4230, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:46.376586', 'step': 4230, 'epoch': 1} {'type': 'loss', 'content': 0.11001625657081604, 'timestamp': '2025-10-01 04:21:46.378845', 'step': 4231, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:46.435169', 'step': 4231, 'epoch': 1} {'type': 'loss', 'content': 0.16296325623989105, 'timestamp': '2025-10-01 04:21:46.442463', 'step': 4232, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:46.497731', 'step': 4232, 'epoch': 1} {'type': 'loss', 'content': 0.19172564148902893, 'timestamp': '2025-10-01 04:21:46.499912', 'step': 4233, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:46.556960', 'step': 4233, 'epoch': 1} {'type': 'loss', 'content': 0.2600758671760559, 'timestamp': '2025-10-01 04:21:46.563336', 'step': 4234, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:46.618298', 'step': 4234, 'epoch': 1} {'type': 'loss', 'content': 0.29807573556900024, 'timestamp': '2025-10-01 04:21:46.620957', 'step': 4235, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:46.676340', 'step': 4235, 'epoch': 1} {'type': 'loss', 'content': 0.2133098989725113, 'timestamp': '2025-10-01 04:21:46.682818', 'step': 4236, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:46.737522', 'step': 4236, 'epoch': 1} {'type': 'loss', 'content': 0.1937689632177353, 'timestamp': '2025-10-01 04:21:46.740483', 'step': 4237, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:46.799459', 'step': 4237, 'epoch': 1} {'type': 'loss', 'content': 0.15056581795215607, 'timestamp': '2025-10-01 04:21:46.801702', 'step': 4238, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:46.858385', 'step': 4238, 'epoch': 1} {'type': 'loss', 'content': 0.13916581869125366, 'timestamp': '2025-10-01 04:21:46.860646', 'step': 4239, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:46.916194', 'step': 4239, 'epoch': 1} {'type': 'loss', 'content': 0.0671973004937172, 'timestamp': '2025-10-01 04:21:46.922855', 'step': 4240, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:46.976440', 'step': 4240, 'epoch': 1} {'type': 'loss', 'content': 0.11815836280584335, 'timestamp': '2025-10-01 04:21:46.978706', 'step': 4241, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:47.033067', 'step': 4241, 'epoch': 1} {'type': 'loss', 'content': 0.11444507539272308, 'timestamp': '2025-10-01 04:21:47.035367', 'step': 4242, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:21:47.089587', 'step': 4242, 'epoch': 1} {'type': 'loss', 'content': 0.2248227447271347, 'timestamp': '2025-10-01 04:21:47.091647', 'step': 4243, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:47.145991', 'step': 4243, 'epoch': 1} {'type': 'loss', 'content': 0.22028964757919312, 'timestamp': '2025-10-01 04:21:47.152039', 'step': 4244, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:47.205348', 'step': 4244, 'epoch': 1} {'type': 'loss', 'content': 0.1269291490316391, 'timestamp': '2025-10-01 04:21:47.207481', 'step': 4245, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:47.267802', 'step': 4245, 'epoch': 1} {'type': 'loss', 'content': 0.17164872586727142, 'timestamp': '2025-10-01 04:21:47.269846', 'step': 4246, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:47.322854', 'step': 4246, 'epoch': 1} {'type': 'loss', 'content': 0.2808012068271637, 'timestamp': '2025-10-01 04:21:47.324983', 'step': 4247, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:47.378032', 'step': 4247, 'epoch': 1} {'type': 'loss', 'content': 0.17606554925441742, 'timestamp': '2025-10-01 04:21:47.384013', 'step': 4248, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:47.436978', 'step': 4248, 'epoch': 1} {'type': 'loss', 'content': 0.24931660294532776, 'timestamp': '2025-10-01 04:21:47.439077', 'step': 4249, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:21:47.493542', 'step': 4249, 'epoch': 1} {'type': 'loss', 'content': 0.28372371196746826, 'timestamp': '2025-10-01 04:21:47.495632', 'step': 4250, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:47.549326', 'step': 4250, 'epoch': 1} {'type': 'loss', 'content': 0.18955254554748535, 'timestamp': '2025-10-01 04:21:47.551408', 'step': 4251, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:47.608295', 'step': 4251, 'epoch': 1} {'type': 'loss', 'content': 0.13414081931114197, 'timestamp': '2025-10-01 04:21:47.614147', 'step': 4252, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:47.666517', 'step': 4252, 'epoch': 1} {'type': 'loss', 'content': 0.13403978943824768, 'timestamp': '2025-10-01 04:21:47.669658', 'step': 4253, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:47.723657', 'step': 4253, 'epoch': 1} {'type': 'loss', 'content': 0.19729353487491608, 'timestamp': '2025-10-01 04:21:47.725840', 'step': 4254, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:47.783386', 'step': 4254, 'epoch': 1} {'type': 'loss', 'content': 0.1486431509256363, 'timestamp': '2025-10-01 04:21:47.787071', 'step': 4255, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:21:47.840394', 'step': 4255, 'epoch': 1} {'type': 'loss', 'content': 0.12330441921949387, 'timestamp': '2025-10-01 04:21:47.846743', 'step': 4256, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:21:47.901006', 'step': 4256, 'epoch': 1} {'type': 'loss', 'content': 0.22881242632865906, 'timestamp': '2025-10-01 04:21:47.904476', 'step': 4257, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:47.958162', 'step': 4257, 'epoch': 1} {'type': 'loss', 'content': 0.28587955236434937, 'timestamp': '2025-10-01 04:21:47.960288', 'step': 4258, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:48.018739', 'step': 4258, 'epoch': 1} {'type': 'loss', 'content': 0.16179145872592926, 'timestamp': '2025-10-01 04:21:48.020877', 'step': 4259, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:48.074531', 'step': 4259, 'epoch': 1} {'type': 'loss', 'content': 0.1360243856906891, 'timestamp': '2025-10-01 04:21:48.080406', 'step': 4260, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:21:48.133374', 'step': 4260, 'epoch': 1} {'type': 'loss', 'content': 0.18940046429634094, 'timestamp': '2025-10-01 04:21:48.136870', 'step': 4261, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:48.190990', 'step': 4261, 'epoch': 1} {'type': 'loss', 'content': 0.1584368646144867, 'timestamp': '2025-10-01 04:21:48.193691', 'step': 4262, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:48.250193', 'step': 4262, 'epoch': 1} {'type': 'loss', 'content': 0.19082066416740417, 'timestamp': '2025-10-01 04:21:48.254036', 'step': 4263, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:48.308170', 'step': 4263, 'epoch': 1} {'type': 'loss', 'content': 0.08818208426237106, 'timestamp': '2025-10-01 04:21:48.314101', 'step': 4264, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:48.368608', 'step': 4264, 'epoch': 1} {'type': 'loss', 'content': 0.13031242787837982, 'timestamp': '2025-10-01 04:21:48.378737', 'step': 4265, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:48.433640', 'step': 4265, 'epoch': 1} {'type': 'loss', 'content': 0.209524005651474, 'timestamp': '2025-10-01 04:21:48.436322', 'step': 4266, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:21:48.491546', 'step': 4266, 'epoch': 1} {'type': 'loss', 'content': 0.14945054054260254, 'timestamp': '2025-10-01 04:21:48.493926', 'step': 4267, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:21:48.548828', 'step': 4267, 'epoch': 1} {'type': 'loss', 'content': 0.19890336692333221, 'timestamp': '2025-10-01 04:21:48.555005', 'step': 4268, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:21:48.609369', 'step': 4268, 'epoch': 1} {'type': 'loss', 'content': 0.219970241189003, 'timestamp': '2025-10-01 04:21:48.611731', 'step': 4269, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:48.665985', 'step': 4269, 'epoch': 1} {'type': 'loss', 'content': 0.09631817042827606, 'timestamp': '2025-10-01 04:21:48.668304', 'step': 4270, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:48.723014', 'step': 4270, 'epoch': 1} {'type': 'loss', 'content': 0.16502931714057922, 'timestamp': '2025-10-01 04:21:48.725592', 'step': 4271, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:48.779483', 'step': 4271, 'epoch': 1} {'type': 'loss', 'content': 0.13009008765220642, 'timestamp': '2025-10-01 04:21:48.785766', 'step': 4272, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:48.840100', 'step': 4272, 'epoch': 1} {'type': 'loss', 'content': 0.17766346037387848, 'timestamp': '2025-10-01 04:21:48.842488', 'step': 4273, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:48.906241', 'step': 4273, 'epoch': 1} {'type': 'loss', 'content': 0.23454472422599792, 'timestamp': '2025-10-01 04:21:48.908385', 'step': 4274, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:21:48.962862', 'step': 4274, 'epoch': 1} {'type': 'loss', 'content': 0.13876160979270935, 'timestamp': '2025-10-01 04:21:48.965167', 'step': 4275, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:49.019012', 'step': 4275, 'epoch': 1} {'type': 'loss', 'content': 0.16976861655712128, 'timestamp': '2025-10-01 04:21:49.025314', 'step': 4276, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:49.079074', 'step': 4276, 'epoch': 1} {'type': 'loss', 'content': 0.09910253435373306, 'timestamp': '2025-10-01 04:21:49.081653', 'step': 4277, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:21:49.135650', 'step': 4277, 'epoch': 1} {'type': 'loss', 'content': 0.10557462275028229, 'timestamp': '2025-10-01 04:21:49.140713', 'step': 4278, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:21:49.199857', 'step': 4278, 'epoch': 1} {'type': 'loss', 'content': 0.17607331275939941, 'timestamp': '2025-10-01 04:21:49.208434', 'step': 4279, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:49.269049', 'step': 4279, 'epoch': 1} {'type': 'loss', 'content': 0.2378852367401123, 'timestamp': '2025-10-01 04:21:49.274972', 'step': 4280, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:49.331367', 'step': 4280, 'epoch': 1} {'type': 'loss', 'content': 0.12655076384544373, 'timestamp': '2025-10-01 04:21:49.340886', 'step': 4281, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:49.397812', 'step': 4281, 'epoch': 1} {'type': 'loss', 'content': 0.09639205783605576, 'timestamp': '2025-10-01 04:21:49.400196', 'step': 4282, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:21:49.453486', 'step': 4282, 'epoch': 1} {'type': 'loss', 'content': 0.1710689514875412, 'timestamp': '2025-10-01 04:21:49.458258', 'step': 4283, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:49.519003', 'step': 4283, 'epoch': 1} {'type': 'loss', 'content': 0.20224374532699585, 'timestamp': '2025-10-01 04:21:49.526746', 'step': 4284, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:49.580554', 'step': 4284, 'epoch': 1} {'type': 'loss', 'content': 0.1686745285987854, 'timestamp': '2025-10-01 04:21:49.583729', 'step': 4285, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:49.639318', 'step': 4285, 'epoch': 1} {'type': 'loss', 'content': 0.19586025178432465, 'timestamp': '2025-10-01 04:21:49.641653', 'step': 4286, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:21:49.695176', 'step': 4286, 'epoch': 1} {'type': 'loss', 'content': 0.18417052924633026, 'timestamp': '2025-10-01 04:21:49.702761', 'step': 4287, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:49.755984', 'step': 4287, 'epoch': 1} {'type': 'loss', 'content': 0.16309544444084167, 'timestamp': '2025-10-01 04:21:49.761670', 'step': 4288, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:49.826553', 'step': 4288, 'epoch': 1} {'type': 'loss', 'content': 0.13684378564357758, 'timestamp': '2025-10-01 04:21:49.829069', 'step': 4289, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:49.882394', 'step': 4289, 'epoch': 1} {'type': 'loss', 'content': 0.24922838807106018, 'timestamp': '2025-10-01 04:21:49.884461', 'step': 4290, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:49.938265', 'step': 4290, 'epoch': 1} {'type': 'loss', 'content': 0.13545940816402435, 'timestamp': '2025-10-01 04:21:49.940388', 'step': 4291, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:49.993486', 'step': 4291, 'epoch': 1} {'type': 'loss', 'content': 0.14333415031433105, 'timestamp': '2025-10-01 04:21:50.001739', 'step': 4292, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:50.058615', 'step': 4292, 'epoch': 1} {'type': 'loss', 'content': 0.2831304371356964, 'timestamp': '2025-10-01 04:21:50.060767', 'step': 4293, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:50.113145', 'step': 4293, 'epoch': 1} {'type': 'loss', 'content': 0.11210524290800095, 'timestamp': '2025-10-01 04:21:50.115215', 'step': 4294, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:21:50.168338', 'step': 4294, 'epoch': 1} {'type': 'loss', 'content': 0.13778163492679596, 'timestamp': '2025-10-01 04:21:50.173500', 'step': 4295, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:50.239186', 'step': 4295, 'epoch': 1} {'type': 'loss', 'content': 0.16117213666439056, 'timestamp': '2025-10-01 04:21:50.245522', 'step': 4296, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:50.302533', 'step': 4296, 'epoch': 1} {'type': 'loss', 'content': 0.19013381004333496, 'timestamp': '2025-10-01 04:21:50.304705', 'step': 4297, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:21:50.359055', 'step': 4297, 'epoch': 1} {'type': 'loss', 'content': 0.12992092967033386, 'timestamp': '2025-10-01 04:21:50.362308', 'step': 4298, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:50.417772', 'step': 4298, 'epoch': 1} {'type': 'loss', 'content': 0.26444244384765625, 'timestamp': '2025-10-01 04:21:50.421485', 'step': 4299, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:50.479438', 'step': 4299, 'epoch': 1} {'type': 'loss', 'content': 0.17534658312797546, 'timestamp': '2025-10-01 04:21:50.489358', 'step': 4300, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:21:50.542950', 'step': 4300, 'epoch': 1} {'type': 'loss', 'content': 0.19959919154644012, 'timestamp': '2025-10-01 04:21:50.545050', 'step': 4301, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:21:50.598912', 'step': 4301, 'epoch': 1} {'type': 'loss', 'content': 0.11852477490901947, 'timestamp': '2025-10-01 04:21:50.601019', 'step': 4302, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:50.655720', 'step': 4302, 'epoch': 1} {'type': 'loss', 'content': 0.1698157638311386, 'timestamp': '2025-10-01 04:21:50.657689', 'step': 4303, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:50.711540', 'step': 4303, 'epoch': 1} {'type': 'loss', 'content': 0.1291898787021637, 'timestamp': '2025-10-01 04:21:50.717132', 'step': 4304, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:50.770229', 'step': 4304, 'epoch': 1} {'type': 'loss', 'content': 0.17711077630519867, 'timestamp': '2025-10-01 04:21:50.772535', 'step': 4305, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:50.827470', 'step': 4305, 'epoch': 1} {'type': 'loss', 'content': 0.103810153901577, 'timestamp': '2025-10-01 04:21:50.829990', 'step': 4306, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:21:50.885685', 'step': 4306, 'epoch': 1} {'type': 'loss', 'content': 0.12624436616897583, 'timestamp': '2025-10-01 04:21:50.887585', 'step': 4307, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:21:50.941562', 'step': 4307, 'epoch': 1} {'type': 'loss', 'content': 0.14896799623966217, 'timestamp': '2025-10-01 04:21:50.947081', 'step': 4308, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:51.000101', 'step': 4308, 'epoch': 1} {'type': 'loss', 'content': 0.15706183016300201, 'timestamp': '2025-10-01 04:21:51.002323', 'step': 4309, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:51.055949', 'step': 4309, 'epoch': 1} {'type': 'loss', 'content': 0.20916269719600677, 'timestamp': '2025-10-01 04:21:51.058040', 'step': 4310, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:51.111178', 'step': 4310, 'epoch': 1} {'type': 'loss', 'content': 0.0755808874964714, 'timestamp': '2025-10-01 04:21:51.113223', 'step': 4311, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:51.166752', 'step': 4311, 'epoch': 1} {'type': 'loss', 'content': 0.17324551939964294, 'timestamp': '2025-10-01 04:21:51.176067', 'step': 4312, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:51.241203', 'step': 4312, 'epoch': 1} {'type': 'loss', 'content': 0.1358671337366104, 'timestamp': '2025-10-01 04:21:51.246924', 'step': 4313, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:51.300710', 'step': 4313, 'epoch': 1} {'type': 'loss', 'content': 0.20182695984840393, 'timestamp': '2025-10-01 04:21:51.303506', 'step': 4314, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:51.356834', 'step': 4314, 'epoch': 1} {'type': 'loss', 'content': 0.21363970637321472, 'timestamp': '2025-10-01 04:21:51.358879', 'step': 4315, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:51.413436', 'step': 4315, 'epoch': 1} {'type': 'loss', 'content': 0.1237606480717659, 'timestamp': '2025-10-01 04:21:51.419138', 'step': 4316, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:21:51.472407', 'step': 4316, 'epoch': 1} {'type': 'loss', 'content': 0.3020612895488739, 'timestamp': '2025-10-01 04:21:51.474295', 'step': 4317, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:21:51.527914', 'step': 4317, 'epoch': 1} {'type': 'loss', 'content': 0.1305871307849884, 'timestamp': '2025-10-01 04:21:51.530057', 'step': 4318, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:51.583485', 'step': 4318, 'epoch': 1} {'type': 'loss', 'content': 0.17289432883262634, 'timestamp': '2025-10-01 04:21:51.586189', 'step': 4319, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:51.639806', 'step': 4319, 'epoch': 1} {'type': 'loss', 'content': 0.17476221919059753, 'timestamp': '2025-10-01 04:21:51.645323', 'step': 4320, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:51.700823', 'step': 4320, 'epoch': 1} {'type': 'loss', 'content': 0.12698836624622345, 'timestamp': '2025-10-01 04:21:51.703587', 'step': 4321, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:51.756837', 'step': 4321, 'epoch': 1} {'type': 'loss', 'content': 0.18442630767822266, 'timestamp': '2025-10-01 04:21:51.758854', 'step': 4322, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:51.812726', 'step': 4322, 'epoch': 1} {'type': 'loss', 'content': 0.21269890666007996, 'timestamp': '2025-10-01 04:21:51.814584', 'step': 4323, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:21:51.868787', 'step': 4323, 'epoch': 1} {'type': 'loss', 'content': 0.19522744417190552, 'timestamp': '2025-10-01 04:21:51.874499', 'step': 4324, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:21:51.928467', 'step': 4324, 'epoch': 1} {'type': 'loss', 'content': 0.13184574246406555, 'timestamp': '2025-10-01 04:21:51.930335', 'step': 4325, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:51.983936', 'step': 4325, 'epoch': 1} {'type': 'loss', 'content': 0.16236184537410736, 'timestamp': '2025-10-01 04:21:51.985909', 'step': 4326, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:52.039309', 'step': 4326, 'epoch': 1} {'type': 'loss', 'content': 0.3121357560157776, 'timestamp': '2025-10-01 04:21:52.041422', 'step': 4327, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:52.095366', 'step': 4327, 'epoch': 1} {'type': 'loss', 'content': 0.1692674160003662, 'timestamp': '2025-10-01 04:21:52.100980', 'step': 4328, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:52.154256', 'step': 4328, 'epoch': 1} {'type': 'loss', 'content': 0.1604650914669037, 'timestamp': '2025-10-01 04:21:52.156532', 'step': 4329, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:52.241703', 'step': 4329, 'epoch': 1} {'type': 'loss', 'content': 0.220183864235878, 'timestamp': '2025-10-01 04:21:52.243820', 'step': 4330, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:52.299242', 'step': 4330, 'epoch': 1} {'type': 'loss', 'content': 0.19174319505691528, 'timestamp': '2025-10-01 04:21:52.301276', 'step': 4331, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:52.354891', 'step': 4331, 'epoch': 1} {'type': 'loss', 'content': 0.09239638596773148, 'timestamp': '2025-10-01 04:21:52.360462', 'step': 4332, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:52.417996', 'step': 4332, 'epoch': 1} {'type': 'loss', 'content': 0.11816877871751785, 'timestamp': '2025-10-01 04:21:52.419898', 'step': 4333, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:52.473119', 'step': 4333, 'epoch': 1} {'type': 'loss', 'content': 0.09585416316986084, 'timestamp': '2025-10-01 04:21:52.475167', 'step': 4334, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:52.529045', 'step': 4334, 'epoch': 1} {'type': 'loss', 'content': 0.11148399114608765, 'timestamp': '2025-10-01 04:21:52.531934', 'step': 4335, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:52.587741', 'step': 4335, 'epoch': 1} {'type': 'loss', 'content': 0.17981359362602234, 'timestamp': '2025-10-01 04:21:52.593962', 'step': 4336, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:52.647984', 'step': 4336, 'epoch': 1} {'type': 'loss', 'content': 0.13329783082008362, 'timestamp': '2025-10-01 04:21:52.650259', 'step': 4337, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:52.721628', 'step': 4337, 'epoch': 1} {'type': 'loss', 'content': 0.22781158983707428, 'timestamp': '2025-10-01 04:21:52.723905', 'step': 4338, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:52.777784', 'step': 4338, 'epoch': 1} {'type': 'loss', 'content': 0.18138672411441803, 'timestamp': '2025-10-01 04:21:52.780186', 'step': 4339, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:52.845628', 'step': 4339, 'epoch': 1} {'type': 'loss', 'content': 0.13162904977798462, 'timestamp': '2025-10-01 04:21:52.851433', 'step': 4340, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:21:52.904100', 'step': 4340, 'epoch': 1} {'type': 'loss', 'content': 0.1432705521583557, 'timestamp': '2025-10-01 04:21:52.906547', 'step': 4341, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:21:52.960850', 'step': 4341, 'epoch': 1} {'type': 'loss', 'content': 0.1662861406803131, 'timestamp': '2025-10-01 04:21:52.963299', 'step': 4342, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:53.016779', 'step': 4342, 'epoch': 1} {'type': 'loss', 'content': 0.18714019656181335, 'timestamp': '2025-10-01 04:21:53.018900', 'step': 4343, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:53.072415', 'step': 4343, 'epoch': 1} {'type': 'loss', 'content': 0.1731569766998291, 'timestamp': '2025-10-01 04:21:53.078137', 'step': 4344, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:53.132590', 'step': 4344, 'epoch': 1} {'type': 'loss', 'content': 0.10068387538194656, 'timestamp': '2025-10-01 04:21:53.134719', 'step': 4345, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:53.188500', 'step': 4345, 'epoch': 1} {'type': 'loss', 'content': 0.12952180206775665, 'timestamp': '2025-10-01 04:21:53.190568', 'step': 4346, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:21:53.243483', 'step': 4346, 'epoch': 1} {'type': 'loss', 'content': 0.18070381879806519, 'timestamp': '2025-10-01 04:21:53.245699', 'step': 4347, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:53.299134', 'step': 4347, 'epoch': 1} {'type': 'loss', 'content': 0.22429528832435608, 'timestamp': '2025-10-01 04:21:53.304838', 'step': 4348, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:53.357214', 'step': 4348, 'epoch': 1} {'type': 'loss', 'content': 0.16470712423324585, 'timestamp': '2025-10-01 04:21:53.367430', 'step': 4349, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:53.420731', 'step': 4349, 'epoch': 1} {'type': 'loss', 'content': 0.218362957239151, 'timestamp': '2025-10-01 04:21:53.422966', 'step': 4350, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:21:53.475769', 'step': 4350, 'epoch': 1} {'type': 'loss', 'content': 0.13307973742485046, 'timestamp': '2025-10-01 04:21:53.477929', 'step': 4351, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:53.531594', 'step': 4351, 'epoch': 1} {'type': 'loss', 'content': 0.19034811854362488, 'timestamp': '2025-10-01 04:21:53.537079', 'step': 4352, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:53.589770', 'step': 4352, 'epoch': 1} {'type': 'loss', 'content': 0.12845759093761444, 'timestamp': '2025-10-01 04:21:53.592041', 'step': 4353, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:53.645285', 'step': 4353, 'epoch': 1} {'type': 'loss', 'content': 0.19426500797271729, 'timestamp': '2025-10-01 04:21:53.647398', 'step': 4354, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:53.700417', 'step': 4354, 'epoch': 1} {'type': 'loss', 'content': 0.21695943176746368, 'timestamp': '2025-10-01 04:21:53.702968', 'step': 4355, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:53.755485', 'step': 4355, 'epoch': 1} {'type': 'loss', 'content': 0.0794670432806015, 'timestamp': '2025-10-01 04:21:53.761897', 'step': 4356, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:53.817449', 'step': 4356, 'epoch': 1} {'type': 'loss', 'content': 0.2545014023780823, 'timestamp': '2025-10-01 04:21:53.820323', 'step': 4357, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:53.876249', 'step': 4357, 'epoch': 1} {'type': 'loss', 'content': 0.18200825154781342, 'timestamp': '2025-10-01 04:21:53.878469', 'step': 4358, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:53.932027', 'step': 4358, 'epoch': 1} {'type': 'loss', 'content': 0.2109372466802597, 'timestamp': '2025-10-01 04:21:53.934964', 'step': 4359, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:53.989358', 'step': 4359, 'epoch': 1} {'type': 'loss', 'content': 0.05806780606508255, 'timestamp': '2025-10-01 04:21:53.995020', 'step': 4360, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:21:54.049177', 'step': 4360, 'epoch': 1} {'type': 'loss', 'content': 0.13115859031677246, 'timestamp': '2025-10-01 04:21:54.051107', 'step': 4361, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:54.104196', 'step': 4361, 'epoch': 1} {'type': 'loss', 'content': 0.25082436203956604, 'timestamp': '2025-10-01 04:21:54.106785', 'step': 4362, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:54.160733', 'step': 4362, 'epoch': 1} {'type': 'loss', 'content': 0.11971047520637512, 'timestamp': '2025-10-01 04:21:54.162927', 'step': 4363, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:21:54.216336', 'step': 4363, 'epoch': 1} {'type': 'loss', 'content': 0.18145497143268585, 'timestamp': '2025-10-01 04:21:54.221818', 'step': 4364, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:54.274281', 'step': 4364, 'epoch': 1} {'type': 'loss', 'content': 0.15016654133796692, 'timestamp': '2025-10-01 04:21:54.276459', 'step': 4365, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:54.330310', 'step': 4365, 'epoch': 1} {'type': 'loss', 'content': 0.1693805754184723, 'timestamp': '2025-10-01 04:21:54.332626', 'step': 4366, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:54.386754', 'step': 4366, 'epoch': 1} {'type': 'loss', 'content': 0.17183387279510498, 'timestamp': '2025-10-01 04:21:54.388873', 'step': 4367, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:21:54.442343', 'step': 4367, 'epoch': 1} {'type': 'loss', 'content': 0.301984041929245, 'timestamp': '2025-10-01 04:21:54.447919', 'step': 4368, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:54.500405', 'step': 4368, 'epoch': 1} {'type': 'loss', 'content': 0.13990937173366547, 'timestamp': '2025-10-01 04:21:54.502595', 'step': 4369, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:21:54.555687', 'step': 4369, 'epoch': 1} {'type': 'loss', 'content': 0.3141396939754486, 'timestamp': '2025-10-01 04:21:54.558271', 'step': 4370, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:54.631021', 'step': 4370, 'epoch': 1} {'type': 'loss', 'content': 0.18446217477321625, 'timestamp': '2025-10-01 04:21:54.633241', 'step': 4371, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:54.686405', 'step': 4371, 'epoch': 1} {'type': 'loss', 'content': 0.2423495054244995, 'timestamp': '2025-10-01 04:21:54.692152', 'step': 4372, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:54.745747', 'step': 4372, 'epoch': 1} {'type': 'loss', 'content': 0.1544368863105774, 'timestamp': '2025-10-01 04:21:54.748130', 'step': 4373, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:54.801255', 'step': 4373, 'epoch': 1} {'type': 'loss', 'content': 0.26163166761398315, 'timestamp': '2025-10-01 04:21:54.803422', 'step': 4374, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:54.859697', 'step': 4374, 'epoch': 1} {'type': 'loss', 'content': 0.2749277651309967, 'timestamp': '2025-10-01 04:21:54.861729', 'step': 4375, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:54.915707', 'step': 4375, 'epoch': 1} {'type': 'loss', 'content': 0.1584153026342392, 'timestamp': '2025-10-01 04:21:54.921435', 'step': 4376, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:21:54.974550', 'step': 4376, 'epoch': 1} {'type': 'loss', 'content': 0.097234345972538, 'timestamp': '2025-10-01 04:21:54.976596', 'step': 4377, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:55.037232', 'step': 4377, 'epoch': 1} {'type': 'loss', 'content': 0.12464451044797897, 'timestamp': '2025-10-01 04:21:55.039208', 'step': 4378, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:21:55.091984', 'step': 4378, 'epoch': 1} {'type': 'loss', 'content': 0.20330661535263062, 'timestamp': '2025-10-01 04:21:55.094259', 'step': 4379, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:55.146997', 'step': 4379, 'epoch': 1} {'type': 'loss', 'content': 0.12145164608955383, 'timestamp': '2025-10-01 04:21:55.156212', 'step': 4380, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:55.213644', 'step': 4380, 'epoch': 1} {'type': 'loss', 'content': 0.19045709073543549, 'timestamp': '2025-10-01 04:21:55.217936', 'step': 4381, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:55.272155', 'step': 4381, 'epoch': 1} {'type': 'loss', 'content': 0.2666904628276825, 'timestamp': '2025-10-01 04:21:55.282811', 'step': 4382, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:55.336801', 'step': 4382, 'epoch': 1} {'type': 'loss', 'content': 0.1384078562259674, 'timestamp': '2025-10-01 04:21:55.338955', 'step': 4383, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:55.392368', 'step': 4383, 'epoch': 1} {'type': 'loss', 'content': 0.12388402223587036, 'timestamp': '2025-10-01 04:21:55.397707', 'step': 4384, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:55.450578', 'step': 4384, 'epoch': 1} {'type': 'loss', 'content': 0.1794874221086502, 'timestamp': '2025-10-01 04:21:55.454241', 'step': 4385, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:55.507709', 'step': 4385, 'epoch': 1} {'type': 'loss', 'content': 0.195126473903656, 'timestamp': '2025-10-01 04:21:55.509945', 'step': 4386, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:55.564506', 'step': 4386, 'epoch': 1} {'type': 'loss', 'content': 0.20651936531066895, 'timestamp': '2025-10-01 04:21:55.566847', 'step': 4387, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:55.620870', 'step': 4387, 'epoch': 1} {'type': 'loss', 'content': 0.10234479606151581, 'timestamp': '2025-10-01 04:21:55.627951', 'step': 4388, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:55.680508', 'step': 4388, 'epoch': 1} {'type': 'loss', 'content': 0.16218708455562592, 'timestamp': '2025-10-01 04:21:55.689102', 'step': 4389, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:55.765383', 'step': 4389, 'epoch': 1} {'type': 'loss', 'content': 0.16259053349494934, 'timestamp': '2025-10-01 04:21:55.767254', 'step': 4390, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:55.820297', 'step': 4390, 'epoch': 1} {'type': 'loss', 'content': 0.14681792259216309, 'timestamp': '2025-10-01 04:21:55.823770', 'step': 4391, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:55.877042', 'step': 4391, 'epoch': 1} {'type': 'loss', 'content': 0.18963901698589325, 'timestamp': '2025-10-01 04:21:55.882681', 'step': 4392, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:55.935715', 'step': 4392, 'epoch': 1} {'type': 'loss', 'content': 0.1283896416425705, 'timestamp': '2025-10-01 04:21:55.949236', 'step': 4393, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:56.001990', 'step': 4393, 'epoch': 1} {'type': 'loss', 'content': 0.17098145186901093, 'timestamp': '2025-10-01 04:21:56.004093', 'step': 4394, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:56.059848', 'step': 4394, 'epoch': 1} {'type': 'loss', 'content': 0.14795513451099396, 'timestamp': '2025-10-01 04:21:56.061745', 'step': 4395, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:56.114705', 'step': 4395, 'epoch': 1} {'type': 'loss', 'content': 0.2318820357322693, 'timestamp': '2025-10-01 04:21:56.121018', 'step': 4396, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:56.172860', 'step': 4396, 'epoch': 1} {'type': 'loss', 'content': 0.16514021158218384, 'timestamp': '2025-10-01 04:21:56.175009', 'step': 4397, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:56.228001', 'step': 4397, 'epoch': 1} {'type': 'loss', 'content': 0.164878249168396, 'timestamp': '2025-10-01 04:21:56.230104', 'step': 4398, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:56.289620', 'step': 4398, 'epoch': 1} {'type': 'loss', 'content': 0.1354876309633255, 'timestamp': '2025-10-01 04:21:56.292247', 'step': 4399, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:56.348003', 'step': 4399, 'epoch': 1} {'type': 'loss', 'content': 0.1742686927318573, 'timestamp': '2025-10-01 04:21:56.354368', 'step': 4400, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:56.409926', 'step': 4400, 'epoch': 1} {'type': 'loss', 'content': 0.12272577732801437, 'timestamp': '2025-10-01 04:21:56.412081', 'step': 4401, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:56.464993', 'step': 4401, 'epoch': 1} {'type': 'loss', 'content': 0.264165461063385, 'timestamp': '2025-10-01 04:21:56.467119', 'step': 4402, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:21:56.520116', 'step': 4402, 'epoch': 1} {'type': 'loss', 'content': 0.2339942306280136, 'timestamp': '2025-10-01 04:21:56.522209', 'step': 4403, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:56.575758', 'step': 4403, 'epoch': 1} {'type': 'loss', 'content': 0.1943235844373703, 'timestamp': '2025-10-01 04:21:56.581448', 'step': 4404, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:56.634116', 'step': 4404, 'epoch': 1} {'type': 'loss', 'content': 0.09549082070589066, 'timestamp': '2025-10-01 04:21:56.636229', 'step': 4405, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:56.688924', 'step': 4405, 'epoch': 1} {'type': 'loss', 'content': 0.15732881426811218, 'timestamp': '2025-10-01 04:21:56.690977', 'step': 4406, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:56.744427', 'step': 4406, 'epoch': 1} {'type': 'loss', 'content': 0.10632039606571198, 'timestamp': '2025-10-01 04:21:56.746596', 'step': 4407, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:21:56.799683', 'step': 4407, 'epoch': 1} {'type': 'loss', 'content': 0.15504954755306244, 'timestamp': '2025-10-01 04:21:56.805405', 'step': 4408, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:56.859598', 'step': 4408, 'epoch': 1} {'type': 'loss', 'content': 0.1260560303926468, 'timestamp': '2025-10-01 04:21:56.861775', 'step': 4409, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:56.917562', 'step': 4409, 'epoch': 1} {'type': 'loss', 'content': 0.17298100888729095, 'timestamp': '2025-10-01 04:21:56.919799', 'step': 4410, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:21:56.972725', 'step': 4410, 'epoch': 1} {'type': 'loss', 'content': 0.20856723189353943, 'timestamp': '2025-10-01 04:21:56.974696', 'step': 4411, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:21:57.027941', 'step': 4411, 'epoch': 1} {'type': 'loss', 'content': 0.13885551691055298, 'timestamp': '2025-10-01 04:21:57.033376', 'step': 4412, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:21:57.085476', 'step': 4412, 'epoch': 1} {'type': 'loss', 'content': 0.15606054663658142, 'timestamp': '2025-10-01 04:21:57.087619', 'step': 4413, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:57.140501', 'step': 4413, 'epoch': 1} {'type': 'loss', 'content': 0.22708004713058472, 'timestamp': '2025-10-01 04:21:57.142689', 'step': 4414, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:57.195874', 'step': 4414, 'epoch': 1} {'type': 'loss', 'content': 0.09229490906000137, 'timestamp': '2025-10-01 04:21:57.198523', 'step': 4415, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:57.251901', 'step': 4415, 'epoch': 1} {'type': 'loss', 'content': 0.17374564707279205, 'timestamp': '2025-10-01 04:21:57.257813', 'step': 4416, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:57.310626', 'step': 4416, 'epoch': 1} {'type': 'loss', 'content': 0.1290501356124878, 'timestamp': '2025-10-01 04:21:57.313071', 'step': 4417, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:57.367564', 'step': 4417, 'epoch': 1} {'type': 'loss', 'content': 0.17406252026557922, 'timestamp': '2025-10-01 04:21:57.370055', 'step': 4418, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:57.424667', 'step': 4418, 'epoch': 1} {'type': 'loss', 'content': 0.14179101586341858, 'timestamp': '2025-10-01 04:21:57.427495', 'step': 4419, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:57.481949', 'step': 4419, 'epoch': 1} {'type': 'loss', 'content': 0.19203196465969086, 'timestamp': '2025-10-01 04:21:57.487938', 'step': 4420, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:57.541849', 'step': 4420, 'epoch': 1} {'type': 'loss', 'content': 0.13443569839000702, 'timestamp': '2025-10-01 04:21:57.544376', 'step': 4421, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:21:57.599242', 'step': 4421, 'epoch': 1} {'type': 'loss', 'content': 0.20091751217842102, 'timestamp': '2025-10-01 04:21:57.601385', 'step': 4422, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:57.655004', 'step': 4422, 'epoch': 1} {'type': 'loss', 'content': 0.2249976098537445, 'timestamp': '2025-10-01 04:21:57.657609', 'step': 4423, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:21:57.711631', 'step': 4423, 'epoch': 1} {'type': 'loss', 'content': 0.1448335498571396, 'timestamp': '2025-10-01 04:21:57.717187', 'step': 4424, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:57.769899', 'step': 4424, 'epoch': 1} {'type': 'loss', 'content': 0.19545216858386993, 'timestamp': '2025-10-01 04:21:57.771990', 'step': 4425, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:57.838260', 'step': 4425, 'epoch': 1} {'type': 'loss', 'content': 0.2043948620557785, 'timestamp': '2025-10-01 04:21:57.840588', 'step': 4426, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:57.895248', 'step': 4426, 'epoch': 1} {'type': 'loss', 'content': 0.2378675490617752, 'timestamp': '2025-10-01 04:21:57.897605', 'step': 4427, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:57.950110', 'step': 4427, 'epoch': 1} {'type': 'loss', 'content': 0.09226173907518387, 'timestamp': '2025-10-01 04:21:57.955783', 'step': 4428, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:58.008013', 'step': 4428, 'epoch': 1} {'type': 'loss', 'content': 0.21290040016174316, 'timestamp': '2025-10-01 04:21:58.010120', 'step': 4429, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:58.073986', 'step': 4429, 'epoch': 1} {'type': 'loss', 'content': 0.1351650357246399, 'timestamp': '2025-10-01 04:21:58.076287', 'step': 4430, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:58.130456', 'step': 4430, 'epoch': 1} {'type': 'loss', 'content': 0.17877750098705292, 'timestamp': '2025-10-01 04:21:58.132628', 'step': 4431, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:58.186466', 'step': 4431, 'epoch': 1} {'type': 'loss', 'content': 0.24174456298351288, 'timestamp': '2025-10-01 04:21:58.192170', 'step': 4432, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:58.244914', 'step': 4432, 'epoch': 1} {'type': 'loss', 'content': 0.19761840999126434, 'timestamp': '2025-10-01 04:21:58.247169', 'step': 4433, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:58.300696', 'step': 4433, 'epoch': 1} {'type': 'loss', 'content': 0.198664590716362, 'timestamp': '2025-10-01 04:21:58.303416', 'step': 4434, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:58.356624', 'step': 4434, 'epoch': 1} {'type': 'loss', 'content': 0.2752503454685211, 'timestamp': '2025-10-01 04:21:58.358717', 'step': 4435, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:58.416775', 'step': 4435, 'epoch': 1} {'type': 'loss', 'content': 0.08589991927146912, 'timestamp': '2025-10-01 04:21:58.423352', 'step': 4436, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:58.477654', 'step': 4436, 'epoch': 1} {'type': 'loss', 'content': 0.2455504685640335, 'timestamp': '2025-10-01 04:21:58.479733', 'step': 4437, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:58.535396', 'step': 4437, 'epoch': 1} {'type': 'loss', 'content': 0.12070557475090027, 'timestamp': '2025-10-01 04:21:58.537551', 'step': 4438, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:58.590558', 'step': 4438, 'epoch': 1} {'type': 'loss', 'content': 0.16315223276615143, 'timestamp': '2025-10-01 04:21:58.592587', 'step': 4439, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:58.645958', 'step': 4439, 'epoch': 1} {'type': 'loss', 'content': 0.14926080405712128, 'timestamp': '2025-10-01 04:21:58.654606', 'step': 4440, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:21:58.708282', 'step': 4440, 'epoch': 1} {'type': 'loss', 'content': 0.37946343421936035, 'timestamp': '2025-10-01 04:21:58.710534', 'step': 4441, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:58.764524', 'step': 4441, 'epoch': 1} {'type': 'loss', 'content': 0.15428867936134338, 'timestamp': '2025-10-01 04:21:58.766536', 'step': 4442, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:58.820119', 'step': 4442, 'epoch': 1} {'type': 'loss', 'content': 0.1867058128118515, 'timestamp': '2025-10-01 04:21:58.824676', 'step': 4443, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:58.877611', 'step': 4443, 'epoch': 1} {'type': 'loss', 'content': 0.23085996508598328, 'timestamp': '2025-10-01 04:21:58.883219', 'step': 4444, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:58.936896', 'step': 4444, 'epoch': 1} {'type': 'loss', 'content': 0.15618276596069336, 'timestamp': '2025-10-01 04:21:58.939412', 'step': 4445, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:58.993116', 'step': 4445, 'epoch': 1} {'type': 'loss', 'content': 0.1268477737903595, 'timestamp': '2025-10-01 04:21:58.995241', 'step': 4446, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:59.048540', 'step': 4446, 'epoch': 1} {'type': 'loss', 'content': 0.1059594377875328, 'timestamp': '2025-10-01 04:21:59.050784', 'step': 4447, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:59.104678', 'step': 4447, 'epoch': 1} {'type': 'loss', 'content': 0.21066346764564514, 'timestamp': '2025-10-01 04:21:59.110177', 'step': 4448, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:59.162288', 'step': 4448, 'epoch': 1} {'type': 'loss', 'content': 0.10870586335659027, 'timestamp': '2025-10-01 04:21:59.164571', 'step': 4449, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:21:59.221543', 'step': 4449, 'epoch': 1} {'type': 'loss', 'content': 0.2828463017940521, 'timestamp': '2025-10-01 04:21:59.223836', 'step': 4450, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:59.276699', 'step': 4450, 'epoch': 1} {'type': 'loss', 'content': 0.20429165661334991, 'timestamp': '2025-10-01 04:21:59.278747', 'step': 4451, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:21:59.332059', 'step': 4451, 'epoch': 1} {'type': 'loss', 'content': 0.14852070808410645, 'timestamp': '2025-10-01 04:21:59.337577', 'step': 4452, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:59.389989', 'step': 4452, 'epoch': 1} {'type': 'loss', 'content': 0.14175815880298615, 'timestamp': '2025-10-01 04:21:59.391845', 'step': 4453, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:59.444788', 'step': 4453, 'epoch': 1} {'type': 'loss', 'content': 0.14262841641902924, 'timestamp': '2025-10-01 04:21:59.446937', 'step': 4454, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:21:59.500074', 'step': 4454, 'epoch': 1} {'type': 'loss', 'content': 0.27196356654167175, 'timestamp': '2025-10-01 04:21:59.502089', 'step': 4455, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:59.558493', 'step': 4455, 'epoch': 1} {'type': 'loss', 'content': 0.12361206114292145, 'timestamp': '2025-10-01 04:21:59.564310', 'step': 4456, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:59.618157', 'step': 4456, 'epoch': 1} {'type': 'loss', 'content': 0.09912135452032089, 'timestamp': '2025-10-01 04:21:59.620003', 'step': 4457, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:59.676230', 'step': 4457, 'epoch': 1} {'type': 'loss', 'content': 0.19324572384357452, 'timestamp': '2025-10-01 04:21:59.678364', 'step': 4458, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:21:59.731752', 'step': 4458, 'epoch': 1} {'type': 'loss', 'content': 0.09269309043884277, 'timestamp': '2025-10-01 04:21:59.733865', 'step': 4459, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:59.786749', 'step': 4459, 'epoch': 1} {'type': 'loss', 'content': 0.19473044574260712, 'timestamp': '2025-10-01 04:21:59.794952', 'step': 4460, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:59.847922', 'step': 4460, 'epoch': 1} {'type': 'loss', 'content': 0.1631651669740677, 'timestamp': '2025-10-01 04:21:59.850830', 'step': 4461, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:59.903459', 'step': 4461, 'epoch': 1} {'type': 'loss', 'content': 0.08813876658678055, 'timestamp': '2025-10-01 04:21:59.905465', 'step': 4462, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:21:59.971695', 'step': 4462, 'epoch': 1} {'type': 'loss', 'content': 0.17829366028308868, 'timestamp': '2025-10-01 04:21:59.973716', 'step': 4463, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:00.027380', 'step': 4463, 'epoch': 1} {'type': 'loss', 'content': 0.21913234889507294, 'timestamp': '2025-10-01 04:22:00.035181', 'step': 4464, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:00.091730', 'step': 4464, 'epoch': 1} {'type': 'loss', 'content': 0.1514972299337387, 'timestamp': '2025-10-01 04:22:00.093798', 'step': 4465, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:00.148388', 'step': 4465, 'epoch': 1} {'type': 'loss', 'content': 0.1967335045337677, 'timestamp': '2025-10-01 04:22:00.150727', 'step': 4466, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:00.205361', 'step': 4466, 'epoch': 1} {'type': 'loss', 'content': 0.16493603587150574, 'timestamp': '2025-10-01 04:22:00.207401', 'step': 4467, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-01 04:22:00.269489', 'step': 4467, 'epoch': 1} {'type': 'loss', 'content': 0.14392909407615662, 'timestamp': '2025-10-01 04:22:00.275587', 'step': 4468, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:00.329680', 'step': 4468, 'epoch': 1} {'type': 'loss', 'content': 0.13005264103412628, 'timestamp': '2025-10-01 04:22:00.334586', 'step': 4469, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:00.388697', 'step': 4469, 'epoch': 1} {'type': 'loss', 'content': 0.13725046813488007, 'timestamp': '2025-10-01 04:22:00.390580', 'step': 4470, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:00.444348', 'step': 4470, 'epoch': 1} {'type': 'loss', 'content': 0.2425047606229782, 'timestamp': '2025-10-01 04:22:00.447401', 'step': 4471, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:00.501225', 'step': 4471, 'epoch': 1} {'type': 'loss', 'content': 0.12029153853654861, 'timestamp': '2025-10-01 04:22:00.508903', 'step': 4472, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:00.563451', 'step': 4472, 'epoch': 1} {'type': 'loss', 'content': 0.21893152594566345, 'timestamp': '2025-10-01 04:22:00.565649', 'step': 4473, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:22:00.618870', 'step': 4473, 'epoch': 1} {'type': 'loss', 'content': 0.07853373885154724, 'timestamp': '2025-10-01 04:22:00.621437', 'step': 4474, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:00.675057', 'step': 4474, 'epoch': 1} {'type': 'loss', 'content': 0.21026746928691864, 'timestamp': '2025-10-01 04:22:00.677177', 'step': 4475, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:22:00.730189', 'step': 4475, 'epoch': 1} {'type': 'loss', 'content': 0.18556514382362366, 'timestamp': '2025-10-01 04:22:00.735995', 'step': 4476, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:00.793946', 'step': 4476, 'epoch': 1} {'type': 'loss', 'content': 0.12209425866603851, 'timestamp': '2025-10-01 04:22:00.795863', 'step': 4477, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:00.848522', 'step': 4477, 'epoch': 1} {'type': 'loss', 'content': 0.12644197046756744, 'timestamp': '2025-10-01 04:22:00.850423', 'step': 4478, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:00.903849', 'step': 4478, 'epoch': 1} {'type': 'loss', 'content': 0.08434449881315231, 'timestamp': '2025-10-01 04:22:00.905905', 'step': 4479, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:00.958780', 'step': 4479, 'epoch': 1} {'type': 'loss', 'content': 0.18663020431995392, 'timestamp': '2025-10-01 04:22:00.964458', 'step': 4480, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:01.017000', 'step': 4480, 'epoch': 1} {'type': 'loss', 'content': 0.19797289371490479, 'timestamp': '2025-10-01 04:22:01.019131', 'step': 4481, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:01.072205', 'step': 4481, 'epoch': 1} {'type': 'loss', 'content': 0.1884191930294037, 'timestamp': '2025-10-01 04:22:01.074387', 'step': 4482, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:01.132123', 'step': 4482, 'epoch': 1} {'type': 'loss', 'content': 0.12056706100702286, 'timestamp': '2025-10-01 04:22:01.134203', 'step': 4483, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:01.188535', 'step': 4483, 'epoch': 1} {'type': 'loss', 'content': 0.14852416515350342, 'timestamp': '2025-10-01 04:22:01.194542', 'step': 4484, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:01.247836', 'step': 4484, 'epoch': 1} {'type': 'loss', 'content': 0.09919780492782593, 'timestamp': '2025-10-01 04:22:01.249988', 'step': 4485, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:01.303138', 'step': 4485, 'epoch': 1} {'type': 'loss', 'content': 0.23567776381969452, 'timestamp': '2025-10-01 04:22:01.305001', 'step': 4486, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:01.358533', 'step': 4486, 'epoch': 1} {'type': 'loss', 'content': 0.17568954825401306, 'timestamp': '2025-10-01 04:22:01.360820', 'step': 4487, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:22:01.414272', 'step': 4487, 'epoch': 1} {'type': 'loss', 'content': 0.10842227935791016, 'timestamp': '2025-10-01 04:22:01.420133', 'step': 4488, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:01.474241', 'step': 4488, 'epoch': 1} {'type': 'loss', 'content': 0.16019561886787415, 'timestamp': '2025-10-01 04:22:01.476416', 'step': 4489, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:22:01.532018', 'step': 4489, 'epoch': 1} {'type': 'loss', 'content': 0.10286043584346771, 'timestamp': '2025-10-01 04:22:01.534284', 'step': 4490, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:01.591564', 'step': 4490, 'epoch': 1} {'type': 'loss', 'content': 0.1767844557762146, 'timestamp': '2025-10-01 04:22:01.595156', 'step': 4491, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:01.653040', 'step': 4491, 'epoch': 1} {'type': 'loss', 'content': 0.1889306604862213, 'timestamp': '2025-10-01 04:22:01.659040', 'step': 4492, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:01.714500', 'step': 4492, 'epoch': 1} {'type': 'loss', 'content': 0.1476019322872162, 'timestamp': '2025-10-01 04:22:01.716898', 'step': 4493, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:01.771764', 'step': 4493, 'epoch': 1} {'type': 'loss', 'content': 0.11188330501317978, 'timestamp': '2025-10-01 04:22:01.774210', 'step': 4494, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:01.829688', 'step': 4494, 'epoch': 1} {'type': 'loss', 'content': 0.12323299795389175, 'timestamp': '2025-10-01 04:22:01.832069', 'step': 4495, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:01.887135', 'step': 4495, 'epoch': 1} {'type': 'loss', 'content': 0.15296147763729095, 'timestamp': '2025-10-01 04:22:01.893412', 'step': 4496, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:01.946619', 'step': 4496, 'epoch': 1} {'type': 'loss', 'content': 0.29078513383865356, 'timestamp': '2025-10-01 04:22:01.949755', 'step': 4497, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:02.003696', 'step': 4497, 'epoch': 1} {'type': 'loss', 'content': 0.157331183552742, 'timestamp': '2025-10-01 04:22:02.005791', 'step': 4498, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:02.060737', 'step': 4498, 'epoch': 1} {'type': 'loss', 'content': 0.2345350682735443, 'timestamp': '2025-10-01 04:22:02.062832', 'step': 4499, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:02.117333', 'step': 4499, 'epoch': 1} {'type': 'loss', 'content': 0.12381082773208618, 'timestamp': '2025-10-01 04:22:02.123180', 'step': 4500, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 4500', 'timestamp': '2025-10-01 04:22:02.494997', 'step': 4500, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:02.551437', 'step': 4500, 'epoch': 1} {'type': 'loss', 'content': 0.13951632380485535, 'timestamp': '2025-10-01 04:22:02.553677', 'step': 4501, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:02.607652', 'step': 4501, 'epoch': 1} {'type': 'loss', 'content': 0.1051999032497406, 'timestamp': '2025-10-01 04:22:02.610059', 'step': 4502, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:02.663291', 'step': 4502, 'epoch': 1} {'type': 'loss', 'content': 0.2779252529144287, 'timestamp': '2025-10-01 04:22:02.665312', 'step': 4503, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:02.718222', 'step': 4503, 'epoch': 1} {'type': 'loss', 'content': 0.17653262615203857, 'timestamp': '2025-10-01 04:22:02.724000', 'step': 4504, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:02.777063', 'step': 4504, 'epoch': 1} {'type': 'loss', 'content': 0.27499672770500183, 'timestamp': '2025-10-01 04:22:02.779208', 'step': 4505, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:02.832266', 'step': 4505, 'epoch': 1} {'type': 'loss', 'content': 0.10746295005083084, 'timestamp': '2025-10-01 04:22:02.834535', 'step': 4506, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:02.887322', 'step': 4506, 'epoch': 1} {'type': 'loss', 'content': 0.2328845113515854, 'timestamp': '2025-10-01 04:22:02.889537', 'step': 4507, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:02.943693', 'step': 4507, 'epoch': 1} {'type': 'loss', 'content': 0.20264889299869537, 'timestamp': '2025-10-01 04:22:02.949534', 'step': 4508, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:03.008318', 'step': 4508, 'epoch': 1} {'type': 'loss', 'content': 0.15917308628559113, 'timestamp': '2025-10-01 04:22:03.010376', 'step': 4509, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:03.063599', 'step': 4509, 'epoch': 1} {'type': 'loss', 'content': 0.18309201300144196, 'timestamp': '2025-10-01 04:22:03.065817', 'step': 4510, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:03.119390', 'step': 4510, 'epoch': 1} {'type': 'loss', 'content': 0.16343574225902557, 'timestamp': '2025-10-01 04:22:03.121598', 'step': 4511, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:22:03.174408', 'step': 4511, 'epoch': 1} {'type': 'loss', 'content': 0.22721965610980988, 'timestamp': '2025-10-01 04:22:03.180622', 'step': 4512, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:03.233855', 'step': 4512, 'epoch': 1} {'type': 'loss', 'content': 0.19028596580028534, 'timestamp': '2025-10-01 04:22:03.236087', 'step': 4513, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:03.289268', 'step': 4513, 'epoch': 1} {'type': 'loss', 'content': 0.2682221829891205, 'timestamp': '2025-10-01 04:22:03.291422', 'step': 4514, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:03.347375', 'step': 4514, 'epoch': 1} {'type': 'loss', 'content': 0.1574847400188446, 'timestamp': '2025-10-01 04:22:03.349546', 'step': 4515, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:03.402393', 'step': 4515, 'epoch': 1} {'type': 'loss', 'content': 0.1441011279821396, 'timestamp': '2025-10-01 04:22:03.408399', 'step': 4516, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:03.461131', 'step': 4516, 'epoch': 1} {'type': 'loss', 'content': 0.1302834004163742, 'timestamp': '2025-10-01 04:22:03.463339', 'step': 4517, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:03.518187', 'step': 4517, 'epoch': 1} {'type': 'loss', 'content': 0.17559576034545898, 'timestamp': '2025-10-01 04:22:03.520234', 'step': 4518, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:03.573770', 'step': 4518, 'epoch': 1} {'type': 'loss', 'content': 0.15142472088336945, 'timestamp': '2025-10-01 04:22:03.585579', 'step': 4519, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:03.638442', 'step': 4519, 'epoch': 1} {'type': 'loss', 'content': 0.15804333984851837, 'timestamp': '2025-10-01 04:22:03.644105', 'step': 4520, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:03.699381', 'step': 4520, 'epoch': 1} {'type': 'loss', 'content': 0.19145804643630981, 'timestamp': '2025-10-01 04:22:03.702319', 'step': 4521, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:03.755693', 'step': 4521, 'epoch': 1} {'type': 'loss', 'content': 0.18388737738132477, 'timestamp': '2025-10-01 04:22:03.757724', 'step': 4522, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:03.811119', 'step': 4522, 'epoch': 1} {'type': 'loss', 'content': 0.12803201377391815, 'timestamp': '2025-10-01 04:22:03.813210', 'step': 4523, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:03.865969', 'step': 4523, 'epoch': 1} {'type': 'loss', 'content': 0.16123875975608826, 'timestamp': '2025-10-01 04:22:03.871687', 'step': 4524, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:22:03.924243', 'step': 4524, 'epoch': 1} {'type': 'loss', 'content': 0.12234128266572952, 'timestamp': '2025-10-01 04:22:03.926039', 'step': 4525, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:03.978585', 'step': 4525, 'epoch': 1} {'type': 'loss', 'content': 0.1740570217370987, 'timestamp': '2025-10-01 04:22:03.980660', 'step': 4526, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:04.034232', 'step': 4526, 'epoch': 1} {'type': 'loss', 'content': 0.16196955740451813, 'timestamp': '2025-10-01 04:22:04.037135', 'step': 4527, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:22:04.103675', 'step': 4527, 'epoch': 1} {'type': 'loss', 'content': 0.13892199099063873, 'timestamp': '2025-10-01 04:22:04.109295', 'step': 4528, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:22:04.161401', 'step': 4528, 'epoch': 1} {'type': 'loss', 'content': 0.19959881901741028, 'timestamp': '2025-10-01 04:22:04.163551', 'step': 4529, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:04.216343', 'step': 4529, 'epoch': 1} {'type': 'loss', 'content': 0.15876704454421997, 'timestamp': '2025-10-01 04:22:04.218538', 'step': 4530, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:22:04.271660', 'step': 4530, 'epoch': 1} {'type': 'loss', 'content': 0.1610839068889618, 'timestamp': '2025-10-01 04:22:04.273900', 'step': 4531, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:22:04.326886', 'step': 4531, 'epoch': 1} {'type': 'loss', 'content': 0.15648601949214935, 'timestamp': '2025-10-01 04:22:04.332515', 'step': 4532, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:22:04.385899', 'step': 4532, 'epoch': 1} {'type': 'loss', 'content': 0.1508069783449173, 'timestamp': '2025-10-01 04:22:04.388017', 'step': 4533, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:04.441355', 'step': 4533, 'epoch': 1} {'type': 'loss', 'content': 0.36285239458084106, 'timestamp': '2025-10-01 04:22:04.443478', 'step': 4534, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:04.497390', 'step': 4534, 'epoch': 1} {'type': 'loss', 'content': 0.17408572137355804, 'timestamp': '2025-10-01 04:22:04.499270', 'step': 4535, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:04.554990', 'step': 4535, 'epoch': 1} {'type': 'loss', 'content': 0.17110058665275574, 'timestamp': '2025-10-01 04:22:04.560418', 'step': 4536, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:04.614733', 'step': 4536, 'epoch': 1} {'type': 'loss', 'content': 0.19130443036556244, 'timestamp': '2025-10-01 04:22:04.616926', 'step': 4537, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:04.670417', 'step': 4537, 'epoch': 1} {'type': 'loss', 'content': 0.20005716383457184, 'timestamp': '2025-10-01 04:22:04.672421', 'step': 4538, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:04.725570', 'step': 4538, 'epoch': 1} {'type': 'loss', 'content': 0.13935674726963043, 'timestamp': '2025-10-01 04:22:04.727718', 'step': 4539, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:04.781258', 'step': 4539, 'epoch': 1} {'type': 'loss', 'content': 0.1743471473455429, 'timestamp': '2025-10-01 04:22:04.786944', 'step': 4540, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:04.838991', 'step': 4540, 'epoch': 1} {'type': 'loss', 'content': 0.20149429142475128, 'timestamp': '2025-10-01 04:22:04.841213', 'step': 4541, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:22:04.895864', 'step': 4541, 'epoch': 1} {'type': 'loss', 'content': 0.11884451657533646, 'timestamp': '2025-10-01 04:22:04.898466', 'step': 4542, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:04.952955', 'step': 4542, 'epoch': 1} {'type': 'loss', 'content': 0.2522972822189331, 'timestamp': '2025-10-01 04:22:04.955401', 'step': 4543, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:05.009398', 'step': 4543, 'epoch': 1} {'type': 'loss', 'content': 0.09880054742097855, 'timestamp': '2025-10-01 04:22:05.015441', 'step': 4544, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:05.071099', 'step': 4544, 'epoch': 1} {'type': 'loss', 'content': 0.26177990436553955, 'timestamp': '2025-10-01 04:22:05.073642', 'step': 4545, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:05.127563', 'step': 4545, 'epoch': 1} {'type': 'loss', 'content': 0.09606800973415375, 'timestamp': '2025-10-01 04:22:05.129698', 'step': 4546, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:05.183254', 'step': 4546, 'epoch': 1} {'type': 'loss', 'content': 0.13207809627056122, 'timestamp': '2025-10-01 04:22:05.185454', 'step': 4547, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:05.239370', 'step': 4547, 'epoch': 1} {'type': 'loss', 'content': 0.19530640542507172, 'timestamp': '2025-10-01 04:22:05.245597', 'step': 4548, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:05.299075', 'step': 4548, 'epoch': 1} {'type': 'loss', 'content': 0.14526957273483276, 'timestamp': '2025-10-01 04:22:05.301036', 'step': 4549, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:05.355001', 'step': 4549, 'epoch': 1} {'type': 'loss', 'content': 0.17134322226047516, 'timestamp': '2025-10-01 04:22:05.357818', 'step': 4550, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:05.412094', 'step': 4550, 'epoch': 1} {'type': 'loss', 'content': 0.05825438350439072, 'timestamp': '2025-10-01 04:22:05.415106', 'step': 4551, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:05.468693', 'step': 4551, 'epoch': 1} {'type': 'loss', 'content': 0.14108967781066895, 'timestamp': '2025-10-01 04:22:05.474307', 'step': 4552, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:05.527184', 'step': 4552, 'epoch': 1} {'type': 'loss', 'content': 0.13066820800304413, 'timestamp': '2025-10-01 04:22:05.529392', 'step': 4553, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:05.583303', 'step': 4553, 'epoch': 1} {'type': 'loss', 'content': 0.14452335238456726, 'timestamp': '2025-10-01 04:22:05.585820', 'step': 4554, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:05.640056', 'step': 4554, 'epoch': 1} {'type': 'loss', 'content': 0.19191370904445648, 'timestamp': '2025-10-01 04:22:05.642076', 'step': 4555, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:22:05.697681', 'step': 4555, 'epoch': 1} {'type': 'loss', 'content': 0.08658967912197113, 'timestamp': '2025-10-01 04:22:05.703530', 'step': 4556, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:05.757010', 'step': 4556, 'epoch': 1} {'type': 'loss', 'content': 0.1449500024318695, 'timestamp': '2025-10-01 04:22:05.759377', 'step': 4557, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:22:05.813875', 'step': 4557, 'epoch': 1} {'type': 'loss', 'content': 0.09509371966123581, 'timestamp': '2025-10-01 04:22:05.816206', 'step': 4558, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:22:05.870316', 'step': 4558, 'epoch': 1} {'type': 'loss', 'content': 0.11425144970417023, 'timestamp': '2025-10-01 04:22:05.872787', 'step': 4559, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:05.927309', 'step': 4559, 'epoch': 1} {'type': 'loss', 'content': 0.151297926902771, 'timestamp': '2025-10-01 04:22:05.933294', 'step': 4560, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:05.986781', 'step': 4560, 'epoch': 1} {'type': 'loss', 'content': 0.13225139677524567, 'timestamp': '2025-10-01 04:22:05.989265', 'step': 4561, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:06.043036', 'step': 4561, 'epoch': 1} {'type': 'loss', 'content': 0.1521347612142563, 'timestamp': '2025-10-01 04:22:06.045449', 'step': 4562, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:06.102679', 'step': 4562, 'epoch': 1} {'type': 'loss', 'content': 0.13135747611522675, 'timestamp': '2025-10-01 04:22:06.104811', 'step': 4563, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:06.158994', 'step': 4563, 'epoch': 1} {'type': 'loss', 'content': 0.1691083014011383, 'timestamp': '2025-10-01 04:22:06.164868', 'step': 4564, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:06.225322', 'step': 4564, 'epoch': 1} {'type': 'loss', 'content': 0.13924509286880493, 'timestamp': '2025-10-01 04:22:06.227782', 'step': 4565, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:06.281437', 'step': 4565, 'epoch': 1} {'type': 'loss', 'content': 0.10174615681171417, 'timestamp': '2025-10-01 04:22:06.283442', 'step': 4566, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:06.337198', 'step': 4566, 'epoch': 1} {'type': 'loss', 'content': 0.2102048248052597, 'timestamp': '2025-10-01 04:22:06.339389', 'step': 4567, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:06.392810', 'step': 4567, 'epoch': 1} {'type': 'loss', 'content': 0.13810493052005768, 'timestamp': '2025-10-01 04:22:06.398497', 'step': 4568, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:06.450600', 'step': 4568, 'epoch': 1} {'type': 'loss', 'content': 0.19861984252929688, 'timestamp': '2025-10-01 04:22:06.452618', 'step': 4569, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:06.505675', 'step': 4569, 'epoch': 1} {'type': 'loss', 'content': 0.2266457974910736, 'timestamp': '2025-10-01 04:22:06.507811', 'step': 4570, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:06.561447', 'step': 4570, 'epoch': 1} {'type': 'loss', 'content': 0.16119980812072754, 'timestamp': '2025-10-01 04:22:06.563600', 'step': 4571, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:06.617369', 'step': 4571, 'epoch': 1} {'type': 'loss', 'content': 0.1735755354166031, 'timestamp': '2025-10-01 04:22:06.622931', 'step': 4572, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:22:06.680112', 'step': 4572, 'epoch': 1} {'type': 'loss', 'content': 0.19068340957164764, 'timestamp': '2025-10-01 04:22:06.684367', 'step': 4573, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:06.738268', 'step': 4573, 'epoch': 1} {'type': 'loss', 'content': 0.10782567411661148, 'timestamp': '2025-10-01 04:22:06.740568', 'step': 4574, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:06.794187', 'step': 4574, 'epoch': 1} {'type': 'loss', 'content': 0.17131327092647552, 'timestamp': '2025-10-01 04:22:06.796225', 'step': 4575, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:06.850250', 'step': 4575, 'epoch': 1} {'type': 'loss', 'content': 0.18224288523197174, 'timestamp': '2025-10-01 04:22:06.855869', 'step': 4576, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:22:06.908331', 'step': 4576, 'epoch': 1} {'type': 'loss', 'content': 0.17383792996406555, 'timestamp': '2025-10-01 04:22:06.910547', 'step': 4577, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:06.963280', 'step': 4577, 'epoch': 1} {'type': 'loss', 'content': 0.20463721454143524, 'timestamp': '2025-10-01 04:22:06.965383', 'step': 4578, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:22:07.018655', 'step': 4578, 'epoch': 1} {'type': 'loss', 'content': 0.09316590428352356, 'timestamp': '2025-10-01 04:22:07.020606', 'step': 4579, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:07.073348', 'step': 4579, 'epoch': 1} {'type': 'loss', 'content': 0.13578079640865326, 'timestamp': '2025-10-01 04:22:07.078926', 'step': 4580, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:07.131939', 'step': 4580, 'epoch': 1} {'type': 'loss', 'content': 0.15373599529266357, 'timestamp': '2025-10-01 04:22:07.133997', 'step': 4581, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:22:07.187107', 'step': 4581, 'epoch': 1} {'type': 'loss', 'content': 0.15845419466495514, 'timestamp': '2025-10-01 04:22:07.189195', 'step': 4582, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:22:07.242194', 'step': 4582, 'epoch': 1} {'type': 'loss', 'content': 0.2555425465106964, 'timestamp': '2025-10-01 04:22:07.244271', 'step': 4583, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:07.296995', 'step': 4583, 'epoch': 1} {'type': 'loss', 'content': 0.10461577028036118, 'timestamp': '2025-10-01 04:22:07.302785', 'step': 4584, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:07.355981', 'step': 4584, 'epoch': 1} {'type': 'loss', 'content': 0.1876201629638672, 'timestamp': '2025-10-01 04:22:07.358028', 'step': 4585, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:07.410918', 'step': 4585, 'epoch': 1} {'type': 'loss', 'content': 0.1412316858768463, 'timestamp': '2025-10-01 04:22:07.412900', 'step': 4586, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:07.466077', 'step': 4586, 'epoch': 1} {'type': 'loss', 'content': 0.10423125326633453, 'timestamp': '2025-10-01 04:22:07.468352', 'step': 4587, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:07.521472', 'step': 4587, 'epoch': 1} {'type': 'loss', 'content': 0.20077615976333618, 'timestamp': '2025-10-01 04:22:07.527367', 'step': 4588, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:22:07.580477', 'step': 4588, 'epoch': 1} {'type': 'loss', 'content': 0.11425571888685226, 'timestamp': '2025-10-01 04:22:07.582587', 'step': 4589, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:07.635781', 'step': 4589, 'epoch': 1} {'type': 'loss', 'content': 0.11499723792076111, 'timestamp': '2025-10-01 04:22:07.637780', 'step': 4590, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:07.691003', 'step': 4590, 'epoch': 1} {'type': 'loss', 'content': 0.14028289914131165, 'timestamp': '2025-10-01 04:22:07.692965', 'step': 4591, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:22:07.746313', 'step': 4591, 'epoch': 1} {'type': 'loss', 'content': 0.2056608945131302, 'timestamp': '2025-10-01 04:22:07.752098', 'step': 4592, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:07.804812', 'step': 4592, 'epoch': 1} {'type': 'loss', 'content': 0.1709507256746292, 'timestamp': '2025-10-01 04:22:07.806749', 'step': 4593, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:07.859217', 'step': 4593, 'epoch': 1} {'type': 'loss', 'content': 0.23274166882038116, 'timestamp': '2025-10-01 04:22:07.861423', 'step': 4594, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:22:07.914645', 'step': 4594, 'epoch': 1} {'type': 'loss', 'content': 0.16175837814807892, 'timestamp': '2025-10-01 04:22:07.917016', 'step': 4595, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:07.969434', 'step': 4595, 'epoch': 1} {'type': 'loss', 'content': 0.11682287603616714, 'timestamp': '2025-10-01 04:22:07.974908', 'step': 4596, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:08.027875', 'step': 4596, 'epoch': 1} {'type': 'loss', 'content': 0.18820194900035858, 'timestamp': '2025-10-01 04:22:08.034593', 'step': 4597, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:08.087928', 'step': 4597, 'epoch': 1} {'type': 'loss', 'content': 0.08520247787237167, 'timestamp': '2025-10-01 04:22:08.090079', 'step': 4598, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:22:08.143692', 'step': 4598, 'epoch': 1} {'type': 'loss', 'content': 0.13628000020980835, 'timestamp': '2025-10-01 04:22:08.145639', 'step': 4599, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:08.197698', 'step': 4599, 'epoch': 1} {'type': 'loss', 'content': 0.18033093214035034, 'timestamp': '2025-10-01 04:22:08.203310', 'step': 4600, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:08.255536', 'step': 4600, 'epoch': 1} {'type': 'loss', 'content': 0.17841699719429016, 'timestamp': '2025-10-01 04:22:08.258875', 'step': 4601, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:08.311999', 'step': 4601, 'epoch': 1} {'type': 'loss', 'content': 0.13189871609210968, 'timestamp': '2025-10-01 04:22:08.316906', 'step': 4602, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:22:08.370453', 'step': 4602, 'epoch': 1} {'type': 'loss', 'content': 0.20108100771903992, 'timestamp': '2025-10-01 04:22:08.386688', 'step': 4603, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:08.440031', 'step': 4603, 'epoch': 1} {'type': 'loss', 'content': 0.1310497522354126, 'timestamp': '2025-10-01 04:22:08.445643', 'step': 4604, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:08.498166', 'step': 4604, 'epoch': 1} {'type': 'loss', 'content': 0.14031384885311127, 'timestamp': '2025-10-01 04:22:08.500069', 'step': 4605, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:08.555124', 'step': 4605, 'epoch': 1} {'type': 'loss', 'content': 0.15200425684452057, 'timestamp': '2025-10-01 04:22:08.557126', 'step': 4606, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:08.610206', 'step': 4606, 'epoch': 1} {'type': 'loss', 'content': 0.12149322032928467, 'timestamp': '2025-10-01 04:22:08.612816', 'step': 4607, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:08.666201', 'step': 4607, 'epoch': 1} {'type': 'loss', 'content': 0.14681051671504974, 'timestamp': '2025-10-01 04:22:08.672039', 'step': 4608, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:08.724792', 'step': 4608, 'epoch': 1} {'type': 'loss', 'content': 0.13977232575416565, 'timestamp': '2025-10-01 04:22:08.726726', 'step': 4609, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:08.781567', 'step': 4609, 'epoch': 1} {'type': 'loss', 'content': 0.16055512428283691, 'timestamp': '2025-10-01 04:22:08.785515', 'step': 4610, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:08.838844', 'step': 4610, 'epoch': 1} {'type': 'loss', 'content': 0.16085633635520935, 'timestamp': '2025-10-01 04:22:08.841029', 'step': 4611, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:08.895359', 'step': 4611, 'epoch': 1} {'type': 'loss', 'content': 0.09912235289812088, 'timestamp': '2025-10-01 04:22:08.900994', 'step': 4612, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:08.953714', 'step': 4612, 'epoch': 1} {'type': 'loss', 'content': 0.29136934876441956, 'timestamp': '2025-10-01 04:22:08.955589', 'step': 4613, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:09.009154', 'step': 4613, 'epoch': 1} {'type': 'loss', 'content': 0.15160176157951355, 'timestamp': '2025-10-01 04:22:09.011154', 'step': 4614, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:09.071501', 'step': 4614, 'epoch': 1} {'type': 'loss', 'content': 0.13034674525260925, 'timestamp': '2025-10-01 04:22:09.073729', 'step': 4615, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:09.127417', 'step': 4615, 'epoch': 1} {'type': 'loss', 'content': 0.17480814456939697, 'timestamp': '2025-10-01 04:22:09.133239', 'step': 4616, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:09.197488', 'step': 4616, 'epoch': 1} {'type': 'loss', 'content': 0.12118638306856155, 'timestamp': '2025-10-01 04:22:09.208784', 'step': 4617, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:22:09.266090', 'step': 4617, 'epoch': 1} {'type': 'loss', 'content': 0.10711333155632019, 'timestamp': '2025-10-01 04:22:09.268175', 'step': 4618, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:09.324901', 'step': 4618, 'epoch': 1} {'type': 'loss', 'content': 0.1604214757680893, 'timestamp': '2025-10-01 04:22:09.326935', 'step': 4619, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:09.384777', 'step': 4619, 'epoch': 1} {'type': 'loss', 'content': 0.22929255664348602, 'timestamp': '2025-10-01 04:22:09.390309', 'step': 4620, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:09.443091', 'step': 4620, 'epoch': 1} {'type': 'loss', 'content': 0.14701463282108307, 'timestamp': '2025-10-01 04:22:09.445327', 'step': 4621, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:22:09.498574', 'step': 4621, 'epoch': 1} {'type': 'loss', 'content': 0.13536788523197174, 'timestamp': '2025-10-01 04:22:09.500734', 'step': 4622, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:09.555800', 'step': 4622, 'epoch': 1} {'type': 'loss', 'content': 0.13871608674526215, 'timestamp': '2025-10-01 04:22:09.558605', 'step': 4623, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:09.611965', 'step': 4623, 'epoch': 1} {'type': 'loss', 'content': 0.16243207454681396, 'timestamp': '2025-10-01 04:22:09.617719', 'step': 4624, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:09.672047', 'step': 4624, 'epoch': 1} {'type': 'loss', 'content': 0.4235970377922058, 'timestamp': '2025-10-01 04:22:09.678167', 'step': 4625, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:09.740998', 'step': 4625, 'epoch': 1} {'type': 'loss', 'content': 0.2122219204902649, 'timestamp': '2025-10-01 04:22:09.743221', 'step': 4626, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:22:09.797906', 'step': 4626, 'epoch': 1} {'type': 'loss', 'content': 0.2025953084230423, 'timestamp': '2025-10-01 04:22:09.799944', 'step': 4627, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:09.862152', 'step': 4627, 'epoch': 1} {'type': 'loss', 'content': 0.09025438874959946, 'timestamp': '2025-10-01 04:22:09.867654', 'step': 4628, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:09.920623', 'step': 4628, 'epoch': 1} {'type': 'loss', 'content': 0.20976082980632782, 'timestamp': '2025-10-01 04:22:09.923163', 'step': 4629, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:09.976788', 'step': 4629, 'epoch': 1} {'type': 'loss', 'content': 0.17888985574245453, 'timestamp': '2025-10-01 04:22:09.978806', 'step': 4630, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:10.031838', 'step': 4630, 'epoch': 1} {'type': 'loss', 'content': 0.11648569256067276, 'timestamp': '2025-10-01 04:22:10.034240', 'step': 4631, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:10.092245', 'step': 4631, 'epoch': 1} {'type': 'loss', 'content': 0.17349746823310852, 'timestamp': '2025-10-01 04:22:10.097968', 'step': 4632, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:10.150915', 'step': 4632, 'epoch': 1} {'type': 'loss', 'content': 0.18624745309352875, 'timestamp': '2025-10-01 04:22:10.152974', 'step': 4633, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:10.206406', 'step': 4633, 'epoch': 1} {'type': 'loss', 'content': 0.14288224279880524, 'timestamp': '2025-10-01 04:22:10.208276', 'step': 4634, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:22:10.261597', 'step': 4634, 'epoch': 1} {'type': 'loss', 'content': 0.16034652292728424, 'timestamp': '2025-10-01 04:22:10.263879', 'step': 4635, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:10.317178', 'step': 4635, 'epoch': 1} {'type': 'loss', 'content': 0.14225566387176514, 'timestamp': '2025-10-01 04:22:10.322762', 'step': 4636, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:10.375103', 'step': 4636, 'epoch': 1} {'type': 'loss', 'content': 0.2293337881565094, 'timestamp': '2025-10-01 04:22:10.377179', 'step': 4637, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:10.430327', 'step': 4637, 'epoch': 1} {'type': 'loss', 'content': 0.18432499468326569, 'timestamp': '2025-10-01 04:22:10.432805', 'step': 4638, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:10.486788', 'step': 4638, 'epoch': 1} {'type': 'loss', 'content': 0.2815791368484497, 'timestamp': '2025-10-01 04:22:10.488923', 'step': 4639, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:10.543185', 'step': 4639, 'epoch': 1} {'type': 'loss', 'content': 0.23172874748706818, 'timestamp': '2025-10-01 04:22:10.549466', 'step': 4640, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:10.604413', 'step': 4640, 'epoch': 1} {'type': 'loss', 'content': 0.1571541428565979, 'timestamp': '2025-10-01 04:22:10.606671', 'step': 4641, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:22:10.660323', 'step': 4641, 'epoch': 1} {'type': 'loss', 'content': 0.16979357600212097, 'timestamp': '2025-10-01 04:22:10.662489', 'step': 4642, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:10.723813', 'step': 4642, 'epoch': 1} {'type': 'loss', 'content': 0.08854007720947266, 'timestamp': '2025-10-01 04:22:10.725873', 'step': 4643, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:10.780492', 'step': 4643, 'epoch': 1} {'type': 'loss', 'content': 0.21465125679969788, 'timestamp': '2025-10-01 04:22:10.786241', 'step': 4644, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:10.839453', 'step': 4644, 'epoch': 1} {'type': 'loss', 'content': 0.1677766591310501, 'timestamp': '2025-10-01 04:22:10.841795', 'step': 4645, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:10.895046', 'step': 4645, 'epoch': 1} {'type': 'loss', 'content': 0.18493478000164032, 'timestamp': '2025-10-01 04:22:10.897342', 'step': 4646, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:10.958184', 'step': 4646, 'epoch': 1} {'type': 'loss', 'content': 0.17309902608394623, 'timestamp': '2025-10-01 04:22:10.960507', 'step': 4647, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:22:11.014840', 'step': 4647, 'epoch': 1} {'type': 'loss', 'content': 0.13635528087615967, 'timestamp': '2025-10-01 04:22:11.020296', 'step': 4648, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:22:11.075553', 'step': 4648, 'epoch': 1} {'type': 'loss', 'content': 0.22190220654010773, 'timestamp': '2025-10-01 04:22:11.077703', 'step': 4649, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:11.131112', 'step': 4649, 'epoch': 1} {'type': 'loss', 'content': 0.17899812757968903, 'timestamp': '2025-10-01 04:22:11.133110', 'step': 4650, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:11.186738', 'step': 4650, 'epoch': 1} {'type': 'loss', 'content': 0.27578163146972656, 'timestamp': '2025-10-01 04:22:11.188989', 'step': 4651, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:11.242259', 'step': 4651, 'epoch': 1} {'type': 'loss', 'content': 0.10891730338335037, 'timestamp': '2025-10-01 04:22:11.248790', 'step': 4652, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:11.301287', 'step': 4652, 'epoch': 1} {'type': 'loss', 'content': 0.18238221108913422, 'timestamp': '2025-10-01 04:22:11.303256', 'step': 4653, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:11.355312', 'step': 4653, 'epoch': 1} {'type': 'loss', 'content': 0.2550112307071686, 'timestamp': '2025-10-01 04:22:11.357470', 'step': 4654, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:11.411088', 'step': 4654, 'epoch': 1} {'type': 'loss', 'content': 0.16901956498622894, 'timestamp': '2025-10-01 04:22:11.413290', 'step': 4655, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:11.465991', 'step': 4655, 'epoch': 1} {'type': 'loss', 'content': 0.1360781192779541, 'timestamp': '2025-10-01 04:22:11.471955', 'step': 4656, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:11.524666', 'step': 4656, 'epoch': 1} {'type': 'loss', 'content': 0.20841971039772034, 'timestamp': '2025-10-01 04:22:11.526895', 'step': 4657, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:11.579985', 'step': 4657, 'epoch': 1} {'type': 'loss', 'content': 0.20082387328147888, 'timestamp': '2025-10-01 04:22:11.582104', 'step': 4658, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:22:11.634961', 'step': 4658, 'epoch': 1} {'type': 'loss', 'content': 0.14242273569107056, 'timestamp': '2025-10-01 04:22:11.636970', 'step': 4659, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:11.689666', 'step': 4659, 'epoch': 1} {'type': 'loss', 'content': 0.24821124970912933, 'timestamp': '2025-10-01 04:22:11.695570', 'step': 4660, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-01 04:22:25.469960', 'step': 4660, 'epoch': 1} {'type': 'pplx', 'content': 11083.390852692135, 'timestamp': '2025-10-01 04:22:25.473003', 'step': 4660, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:25.525506', 'step': 4660, 'epoch': 1} {'type': 'loss', 'content': 0.13508151471614838, 'timestamp': '2025-10-01 04:22:25.527535', 'step': 4661, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:25.581366', 'step': 4661, 'epoch': 1} {'type': 'loss', 'content': 0.18165522813796997, 'timestamp': '2025-10-01 04:22:25.583454', 'step': 4662, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:25.636246', 'step': 4662, 'epoch': 1} {'type': 'loss', 'content': 0.17063547670841217, 'timestamp': '2025-10-01 04:22:25.638469', 'step': 4663, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:25.691814', 'step': 4663, 'epoch': 1} {'type': 'loss', 'content': 0.146120086312294, 'timestamp': '2025-10-01 04:22:25.697689', 'step': 4664, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:25.753972', 'step': 4664, 'epoch': 1} {'type': 'loss', 'content': 0.09660866856575012, 'timestamp': '2025-10-01 04:22:25.756103', 'step': 4665, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:25.810552', 'step': 4665, 'epoch': 1} {'type': 'loss', 'content': 0.23134639859199524, 'timestamp': '2025-10-01 04:22:25.812690', 'step': 4666, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:22:25.868520', 'step': 4666, 'epoch': 1} {'type': 'loss', 'content': 0.15003342926502228, 'timestamp': '2025-10-01 04:22:25.870572', 'step': 4667, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:25.925134', 'step': 4667, 'epoch': 1} {'type': 'loss', 'content': 0.24810032546520233, 'timestamp': '2025-10-01 04:22:25.931047', 'step': 4668, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:25.983474', 'step': 4668, 'epoch': 1} {'type': 'loss', 'content': 0.1521030068397522, 'timestamp': '2025-10-01 04:22:25.988104', 'step': 4669, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:22:26.043362', 'step': 4669, 'epoch': 1} {'type': 'loss', 'content': 0.21177418529987335, 'timestamp': '2025-10-01 04:22:26.045568', 'step': 4670, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:26.100074', 'step': 4670, 'epoch': 1} {'type': 'loss', 'content': 0.1908797025680542, 'timestamp': '2025-10-01 04:22:26.102078', 'step': 4671, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:26.154983', 'step': 4671, 'epoch': 1} {'type': 'loss', 'content': 0.19498082995414734, 'timestamp': '2025-10-01 04:22:26.160630', 'step': 4672, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:22:26.213212', 'step': 4672, 'epoch': 1} {'type': 'loss', 'content': 0.15679831802845, 'timestamp': '2025-10-01 04:22:26.215380', 'step': 4673, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:26.268103', 'step': 4673, 'epoch': 1} {'type': 'loss', 'content': 0.1671522557735443, 'timestamp': '2025-10-01 04:22:26.270312', 'step': 4674, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:22:26.323452', 'step': 4674, 'epoch': 1} {'type': 'loss', 'content': 0.18412837386131287, 'timestamp': '2025-10-01 04:22:26.325539', 'step': 4675, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:26.389532', 'step': 4675, 'epoch': 1} {'type': 'loss', 'content': 0.2000523954629898, 'timestamp': '2025-10-01 04:22:26.395210', 'step': 4676, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:26.447521', 'step': 4676, 'epoch': 1} {'type': 'loss', 'content': 0.24402466416358948, 'timestamp': '2025-10-01 04:22:26.449757', 'step': 4677, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:26.503178', 'step': 4677, 'epoch': 1} {'type': 'loss', 'content': 0.1417134553194046, 'timestamp': '2025-10-01 04:22:26.505325', 'step': 4678, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:26.559681', 'step': 4678, 'epoch': 1} {'type': 'loss', 'content': 0.19029641151428223, 'timestamp': '2025-10-01 04:22:26.561629', 'step': 4679, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:26.614776', 'step': 4679, 'epoch': 1} {'type': 'loss', 'content': 0.18679916858673096, 'timestamp': '2025-10-01 04:22:26.620508', 'step': 4680, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:26.673083', 'step': 4680, 'epoch': 1} {'type': 'loss', 'content': 0.1262187957763672, 'timestamp': '2025-10-01 04:22:26.684064', 'step': 4681, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:26.737318', 'step': 4681, 'epoch': 1} {'type': 'loss', 'content': 0.18738850951194763, 'timestamp': '2025-10-01 04:22:26.739410', 'step': 4682, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:26.792688', 'step': 4682, 'epoch': 1} {'type': 'loss', 'content': 0.11833260208368301, 'timestamp': '2025-10-01 04:22:26.798428', 'step': 4683, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:26.852233', 'step': 4683, 'epoch': 1} {'type': 'loss', 'content': 0.24266161024570465, 'timestamp': '2025-10-01 04:22:26.858065', 'step': 4684, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:26.910889', 'step': 4684, 'epoch': 1} {'type': 'loss', 'content': 0.16468486189842224, 'timestamp': '2025-10-01 04:22:26.912991', 'step': 4685, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:26.972916', 'step': 4685, 'epoch': 1} {'type': 'loss', 'content': 0.18490847945213318, 'timestamp': '2025-10-01 04:22:26.974926', 'step': 4686, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:22:27.040393', 'step': 4686, 'epoch': 1} {'type': 'loss', 'content': 0.15444141626358032, 'timestamp': '2025-10-01 04:22:27.043213', 'step': 4687, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:27.099543', 'step': 4687, 'epoch': 1} {'type': 'loss', 'content': 0.11329218745231628, 'timestamp': '2025-10-01 04:22:27.105196', 'step': 4688, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:27.158163', 'step': 4688, 'epoch': 1} {'type': 'loss', 'content': 0.06308240443468094, 'timestamp': '2025-10-01 04:22:27.161059', 'step': 4689, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:22:27.214120', 'step': 4689, 'epoch': 1} {'type': 'loss', 'content': 0.11083421111106873, 'timestamp': '2025-10-01 04:22:27.216282', 'step': 4690, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:27.269404', 'step': 4690, 'epoch': 1} {'type': 'loss', 'content': 0.20912030339241028, 'timestamp': '2025-10-01 04:22:27.271503', 'step': 4691, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:27.325555', 'step': 4691, 'epoch': 1} {'type': 'loss', 'content': 0.11082688719034195, 'timestamp': '2025-10-01 04:22:27.331237', 'step': 4692, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:27.384136', 'step': 4692, 'epoch': 1} {'type': 'loss', 'content': 0.11413022130727768, 'timestamp': '2025-10-01 04:22:27.386788', 'step': 4693, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:27.439666', 'step': 4693, 'epoch': 1} {'type': 'loss', 'content': 0.18149921298027039, 'timestamp': '2025-10-01 04:22:27.443368', 'step': 4694, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:27.498338', 'step': 4694, 'epoch': 1} {'type': 'loss', 'content': 0.17482514679431915, 'timestamp': '2025-10-01 04:22:27.500496', 'step': 4695, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:22:27.554039', 'step': 4695, 'epoch': 1} {'type': 'loss', 'content': 0.14285385608673096, 'timestamp': '2025-10-01 04:22:27.560236', 'step': 4696, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:27.612526', 'step': 4696, 'epoch': 1} {'type': 'loss', 'content': 0.10332393646240234, 'timestamp': '2025-10-01 04:22:27.615429', 'step': 4697, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:22:27.673370', 'step': 4697, 'epoch': 1} {'type': 'loss', 'content': 0.22142036259174347, 'timestamp': '2025-10-01 04:22:27.677824', 'step': 4698, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:27.731923', 'step': 4698, 'epoch': 1} {'type': 'loss', 'content': 0.14347347617149353, 'timestamp': '2025-10-01 04:22:27.734225', 'step': 4699, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:27.787837', 'step': 4699, 'epoch': 1} {'type': 'loss', 'content': 0.24118566513061523, 'timestamp': '2025-10-01 04:22:27.793532', 'step': 4700, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:27.846017', 'step': 4700, 'epoch': 1} {'type': 'loss', 'content': 0.16418461501598358, 'timestamp': '2025-10-01 04:22:27.848236', 'step': 4701, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:27.901326', 'step': 4701, 'epoch': 1} {'type': 'loss', 'content': 0.11996684968471527, 'timestamp': '2025-10-01 04:22:27.903884', 'step': 4702, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:27.957820', 'step': 4702, 'epoch': 1} {'type': 'loss', 'content': 0.12385475635528564, 'timestamp': '2025-10-01 04:22:27.960659', 'step': 4703, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:28.013178', 'step': 4703, 'epoch': 1} {'type': 'loss', 'content': 0.1856888085603714, 'timestamp': '2025-10-01 04:22:28.018866', 'step': 4704, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:28.072231', 'step': 4704, 'epoch': 1} {'type': 'loss', 'content': 0.0969180166721344, 'timestamp': '2025-10-01 04:22:28.074520', 'step': 4705, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:28.133291', 'step': 4705, 'epoch': 1} {'type': 'loss', 'content': 0.12087942659854889, 'timestamp': '2025-10-01 04:22:28.137895', 'step': 4706, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:28.191473', 'step': 4706, 'epoch': 1} {'type': 'loss', 'content': 0.16595733165740967, 'timestamp': '2025-10-01 04:22:28.193562', 'step': 4707, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:28.254657', 'step': 4707, 'epoch': 1} {'type': 'loss', 'content': 0.14801962673664093, 'timestamp': '2025-10-01 04:22:28.260169', 'step': 4708, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:22:28.312685', 'step': 4708, 'epoch': 1} {'type': 'loss', 'content': 0.13049614429473877, 'timestamp': '2025-10-01 04:22:28.314772', 'step': 4709, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:28.367817', 'step': 4709, 'epoch': 1} {'type': 'loss', 'content': 0.15273980796337128, 'timestamp': '2025-10-01 04:22:28.370981', 'step': 4710, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:28.423801', 'step': 4710, 'epoch': 1} {'type': 'loss', 'content': 0.16912461817264557, 'timestamp': '2025-10-01 04:22:28.425984', 'step': 4711, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:28.478963', 'step': 4711, 'epoch': 1} {'type': 'loss', 'content': 0.15411022305488586, 'timestamp': '2025-10-01 04:22:28.484526', 'step': 4712, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:28.538498', 'step': 4712, 'epoch': 1} {'type': 'loss', 'content': 0.1641991287469864, 'timestamp': '2025-10-01 04:22:28.541441', 'step': 4713, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:28.595016', 'step': 4713, 'epoch': 1} {'type': 'loss', 'content': 0.3840811550617218, 'timestamp': '2025-10-01 04:22:28.597517', 'step': 4714, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:28.651145', 'step': 4714, 'epoch': 1} {'type': 'loss', 'content': 0.23791702091693878, 'timestamp': '2025-10-01 04:22:28.654115', 'step': 4715, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:28.708046', 'step': 4715, 'epoch': 1} {'type': 'loss', 'content': 0.17669105529785156, 'timestamp': '2025-10-01 04:22:28.713728', 'step': 4716, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:28.766836', 'step': 4716, 'epoch': 1} {'type': 'loss', 'content': 0.18424703180789948, 'timestamp': '2025-10-01 04:22:28.768851', 'step': 4717, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:28.821649', 'step': 4717, 'epoch': 1} {'type': 'loss', 'content': 0.13271060585975647, 'timestamp': '2025-10-01 04:22:28.823769', 'step': 4718, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:22:28.877435', 'step': 4718, 'epoch': 1} {'type': 'loss', 'content': 0.17438454926013947, 'timestamp': '2025-10-01 04:22:28.879480', 'step': 4719, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:28.932703', 'step': 4719, 'epoch': 1} {'type': 'loss', 'content': 0.07685433328151703, 'timestamp': '2025-10-01 04:22:28.938539', 'step': 4720, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:28.991006', 'step': 4720, 'epoch': 1} {'type': 'loss', 'content': 0.2184232622385025, 'timestamp': '2025-10-01 04:22:28.992916', 'step': 4721, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:22:29.047081', 'step': 4721, 'epoch': 1} {'type': 'loss', 'content': 0.12243609875440598, 'timestamp': '2025-10-01 04:22:29.049199', 'step': 4722, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:29.102560', 'step': 4722, 'epoch': 1} {'type': 'loss', 'content': 0.18503226339817047, 'timestamp': '2025-10-01 04:22:29.104718', 'step': 4723, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:29.166367', 'step': 4723, 'epoch': 1} {'type': 'loss', 'content': 0.1330534666776657, 'timestamp': '2025-10-01 04:22:29.171958', 'step': 4724, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:29.224676', 'step': 4724, 'epoch': 1} {'type': 'loss', 'content': 0.12277938425540924, 'timestamp': '2025-10-01 04:22:29.226680', 'step': 4725, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:29.280169', 'step': 4725, 'epoch': 1} {'type': 'loss', 'content': 0.284402072429657, 'timestamp': '2025-10-01 04:22:29.282203', 'step': 4726, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:29.336314', 'step': 4726, 'epoch': 1} {'type': 'loss', 'content': 0.22000154852867126, 'timestamp': '2025-10-01 04:22:29.338790', 'step': 4727, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:22:29.393021', 'step': 4727, 'epoch': 1} {'type': 'loss', 'content': 0.24130408465862274, 'timestamp': '2025-10-01 04:22:29.398761', 'step': 4728, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:29.463359', 'step': 4728, 'epoch': 1} {'type': 'loss', 'content': 0.07135042548179626, 'timestamp': '2025-10-01 04:22:29.465295', 'step': 4729, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:29.517890', 'step': 4729, 'epoch': 1} {'type': 'loss', 'content': 0.13303148746490479, 'timestamp': '2025-10-01 04:22:29.522252', 'step': 4730, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:29.597649', 'step': 4730, 'epoch': 1} {'type': 'loss', 'content': 0.19114825129508972, 'timestamp': '2025-10-01 04:22:29.599860', 'step': 4731, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:29.653282', 'step': 4731, 'epoch': 1} {'type': 'loss', 'content': 0.18193462491035461, 'timestamp': '2025-10-01 04:22:29.659247', 'step': 4732, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:29.715942', 'step': 4732, 'epoch': 1} {'type': 'loss', 'content': 0.15897229313850403, 'timestamp': '2025-10-01 04:22:29.717813', 'step': 4733, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:29.772340', 'step': 4733, 'epoch': 1} {'type': 'loss', 'content': 0.17424681782722473, 'timestamp': '2025-10-01 04:22:29.774436', 'step': 4734, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:22:29.827405', 'step': 4734, 'epoch': 1} {'type': 'loss', 'content': 0.2425144463777542, 'timestamp': '2025-10-01 04:22:29.829539', 'step': 4735, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:29.884013', 'step': 4735, 'epoch': 1} {'type': 'loss', 'content': 0.15044698119163513, 'timestamp': '2025-10-01 04:22:29.889693', 'step': 4736, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:29.952713', 'step': 4736, 'epoch': 1} {'type': 'loss', 'content': 0.13465003669261932, 'timestamp': '2025-10-01 04:22:29.954907', 'step': 4737, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:30.012348', 'step': 4737, 'epoch': 1} {'type': 'loss', 'content': 0.17821049690246582, 'timestamp': '2025-10-01 04:22:30.014898', 'step': 4738, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:30.068434', 'step': 4738, 'epoch': 1} {'type': 'loss', 'content': 0.1788184642791748, 'timestamp': '2025-10-01 04:22:30.070476', 'step': 4739, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:30.123494', 'step': 4739, 'epoch': 1} {'type': 'loss', 'content': 0.12346001714468002, 'timestamp': '2025-10-01 04:22:30.130622', 'step': 4740, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:30.183262', 'step': 4740, 'epoch': 1} {'type': 'loss', 'content': 0.23248662054538727, 'timestamp': '2025-10-01 04:22:30.185552', 'step': 4741, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:30.240095', 'step': 4741, 'epoch': 1} {'type': 'loss', 'content': 0.1083725243806839, 'timestamp': '2025-10-01 04:22:30.244420', 'step': 4742, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:30.298274', 'step': 4742, 'epoch': 1} {'type': 'loss', 'content': 0.18347123265266418, 'timestamp': '2025-10-01 04:22:30.303458', 'step': 4743, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:22:30.359786', 'step': 4743, 'epoch': 1} {'type': 'loss', 'content': 0.10887576639652252, 'timestamp': '2025-10-01 04:22:30.366626', 'step': 4744, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:22:30.422029', 'step': 4744, 'epoch': 1} {'type': 'loss', 'content': 0.08542588353157043, 'timestamp': '2025-10-01 04:22:30.428976', 'step': 4745, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:30.484258', 'step': 4745, 'epoch': 1} {'type': 'loss', 'content': 0.26195967197418213, 'timestamp': '2025-10-01 04:22:30.487350', 'step': 4746, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:30.544584', 'step': 4746, 'epoch': 1} {'type': 'loss', 'content': 0.2840568721294403, 'timestamp': '2025-10-01 04:22:30.546926', 'step': 4747, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:30.604009', 'step': 4747, 'epoch': 1} {'type': 'loss', 'content': 0.10450877249240875, 'timestamp': '2025-10-01 04:22:30.612436', 'step': 4748, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:22:30.671303', 'step': 4748, 'epoch': 1} {'type': 'loss', 'content': 0.20160159468650818, 'timestamp': '2025-10-01 04:22:30.673896', 'step': 4749, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:30.730381', 'step': 4749, 'epoch': 1} {'type': 'loss', 'content': 0.1320575326681137, 'timestamp': '2025-10-01 04:22:30.732747', 'step': 4750, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:30.795084', 'step': 4750, 'epoch': 1} {'type': 'loss', 'content': 0.22382280230522156, 'timestamp': '2025-10-01 04:22:30.802575', 'step': 4751, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:22:30.857131', 'step': 4751, 'epoch': 1} {'type': 'loss', 'content': 0.185856893658638, 'timestamp': '2025-10-01 04:22:30.863370', 'step': 4752, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:30.917172', 'step': 4752, 'epoch': 1} {'type': 'loss', 'content': 0.25054237246513367, 'timestamp': '2025-10-01 04:22:30.919417', 'step': 4753, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:30.974791', 'step': 4753, 'epoch': 1} {'type': 'loss', 'content': 0.181549534201622, 'timestamp': '2025-10-01 04:22:30.977113', 'step': 4754, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:31.030851', 'step': 4754, 'epoch': 1} {'type': 'loss', 'content': 0.17449510097503662, 'timestamp': '2025-10-01 04:22:31.034488', 'step': 4755, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:31.089285', 'step': 4755, 'epoch': 1} {'type': 'loss', 'content': 0.18801182508468628, 'timestamp': '2025-10-01 04:22:31.095358', 'step': 4756, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:31.151837', 'step': 4756, 'epoch': 1} {'type': 'loss', 'content': 0.14848682284355164, 'timestamp': '2025-10-01 04:22:31.154349', 'step': 4757, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:31.208792', 'step': 4757, 'epoch': 1} {'type': 'loss', 'content': 0.12034601718187332, 'timestamp': '2025-10-01 04:22:31.213298', 'step': 4758, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:22:31.271899', 'step': 4758, 'epoch': 1} {'type': 'loss', 'content': 0.178598552942276, 'timestamp': '2025-10-01 04:22:31.274405', 'step': 4759, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:31.328592', 'step': 4759, 'epoch': 1} {'type': 'loss', 'content': 0.1721080243587494, 'timestamp': '2025-10-01 04:22:31.335071', 'step': 4760, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:31.388284', 'step': 4760, 'epoch': 1} {'type': 'loss', 'content': 0.211409792304039, 'timestamp': '2025-10-01 04:22:31.390534', 'step': 4761, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:31.452320', 'step': 4761, 'epoch': 1} {'type': 'loss', 'content': 0.07894764840602875, 'timestamp': '2025-10-01 04:22:31.454602', 'step': 4762, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:31.518148', 'step': 4762, 'epoch': 1} {'type': 'loss', 'content': 0.05768085643649101, 'timestamp': '2025-10-01 04:22:31.525528', 'step': 4763, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:22:31.581754', 'step': 4763, 'epoch': 1} {'type': 'loss', 'content': 0.14340059459209442, 'timestamp': '2025-10-01 04:22:31.587733', 'step': 4764, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:31.641792', 'step': 4764, 'epoch': 1} {'type': 'loss', 'content': 0.14722254872322083, 'timestamp': '2025-10-01 04:22:31.644243', 'step': 4765, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:22:31.698455', 'step': 4765, 'epoch': 1} {'type': 'loss', 'content': 0.3108023405075073, 'timestamp': '2025-10-01 04:22:31.700487', 'step': 4766, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:31.754559', 'step': 4766, 'epoch': 1} {'type': 'loss', 'content': 0.2605176270008087, 'timestamp': '2025-10-01 04:22:31.756680', 'step': 4767, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:22:31.814779', 'step': 4767, 'epoch': 1} {'type': 'loss', 'content': 0.21678180992603302, 'timestamp': '2025-10-01 04:22:31.820756', 'step': 4768, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:31.873795', 'step': 4768, 'epoch': 1} {'type': 'loss', 'content': 0.14822158217430115, 'timestamp': '2025-10-01 04:22:31.875769', 'step': 4769, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:31.929250', 'step': 4769, 'epoch': 1} {'type': 'loss', 'content': 0.2153787612915039, 'timestamp': '2025-10-01 04:22:31.931473', 'step': 4770, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:31.984695', 'step': 4770, 'epoch': 1} {'type': 'loss', 'content': 0.12643858790397644, 'timestamp': '2025-10-01 04:22:31.986926', 'step': 4771, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:32.040525', 'step': 4771, 'epoch': 1} {'type': 'loss', 'content': 0.12584370374679565, 'timestamp': '2025-10-01 04:22:32.046935', 'step': 4772, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:32.102258', 'step': 4772, 'epoch': 1} {'type': 'loss', 'content': 0.1890440285205841, 'timestamp': '2025-10-01 04:22:32.104463', 'step': 4773, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:32.160233', 'step': 4773, 'epoch': 1} {'type': 'loss', 'content': 0.197828009724617, 'timestamp': '2025-10-01 04:22:32.162471', 'step': 4774, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:32.220579', 'step': 4774, 'epoch': 1} {'type': 'loss', 'content': 0.1990976482629776, 'timestamp': '2025-10-01 04:22:32.223785', 'step': 4775, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:32.281060', 'step': 4775, 'epoch': 1} {'type': 'loss', 'content': 0.08976174145936966, 'timestamp': '2025-10-01 04:22:32.287678', 'step': 4776, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:32.352455', 'step': 4776, 'epoch': 1} {'type': 'loss', 'content': 0.1309037059545517, 'timestamp': '2025-10-01 04:22:32.365907', 'step': 4777, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:32.441830', 'step': 4777, 'epoch': 1} {'type': 'loss', 'content': 0.1385018527507782, 'timestamp': '2025-10-01 04:22:32.444067', 'step': 4778, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:32.500290', 'step': 4778, 'epoch': 1} {'type': 'loss', 'content': 0.14500120282173157, 'timestamp': '2025-10-01 04:22:32.502287', 'step': 4779, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:32.557593', 'step': 4779, 'epoch': 1} {'type': 'loss', 'content': 0.1668422818183899, 'timestamp': '2025-10-01 04:22:32.564059', 'step': 4780, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:22:32.619269', 'step': 4780, 'epoch': 1} {'type': 'loss', 'content': 0.25997284054756165, 'timestamp': '2025-10-01 04:22:32.621297', 'step': 4781, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:32.675991', 'step': 4781, 'epoch': 1} {'type': 'loss', 'content': 0.2437303066253662, 'timestamp': '2025-10-01 04:22:32.678360', 'step': 4782, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:32.732681', 'step': 4782, 'epoch': 1} {'type': 'loss', 'content': 0.11373788118362427, 'timestamp': '2025-10-01 04:22:32.734870', 'step': 4783, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:32.788651', 'step': 4783, 'epoch': 1} {'type': 'loss', 'content': 0.2510349452495575, 'timestamp': '2025-10-01 04:22:32.794793', 'step': 4784, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:32.847158', 'step': 4784, 'epoch': 1} {'type': 'loss', 'content': 0.11908863484859467, 'timestamp': '2025-10-01 04:22:32.849832', 'step': 4785, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:32.903561', 'step': 4785, 'epoch': 1} {'type': 'loss', 'content': 0.17677326500415802, 'timestamp': '2025-10-01 04:22:32.905538', 'step': 4786, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:32.958505', 'step': 4786, 'epoch': 1} {'type': 'loss', 'content': 0.13113074004650116, 'timestamp': '2025-10-01 04:22:32.961026', 'step': 4787, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:33.016269', 'step': 4787, 'epoch': 1} {'type': 'loss', 'content': 0.13557010889053345, 'timestamp': '2025-10-01 04:22:33.022874', 'step': 4788, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:33.077957', 'step': 4788, 'epoch': 1} {'type': 'loss', 'content': 0.13931334018707275, 'timestamp': '2025-10-01 04:22:33.080207', 'step': 4789, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:33.133200', 'step': 4789, 'epoch': 1} {'type': 'loss', 'content': 0.1564028561115265, 'timestamp': '2025-10-01 04:22:33.135413', 'step': 4790, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:33.189062', 'step': 4790, 'epoch': 1} {'type': 'loss', 'content': 0.15160445868968964, 'timestamp': '2025-10-01 04:22:33.191107', 'step': 4791, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:22:33.244069', 'step': 4791, 'epoch': 1} {'type': 'loss', 'content': 0.260968416929245, 'timestamp': '2025-10-01 04:22:33.249869', 'step': 4792, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:33.303800', 'step': 4792, 'epoch': 1} {'type': 'loss', 'content': 0.27063095569610596, 'timestamp': '2025-10-01 04:22:33.306024', 'step': 4793, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:33.361600', 'step': 4793, 'epoch': 1} {'type': 'loss', 'content': 0.1680011749267578, 'timestamp': '2025-10-01 04:22:33.363692', 'step': 4794, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:22:33.416384', 'step': 4794, 'epoch': 1} {'type': 'loss', 'content': 0.19718924164772034, 'timestamp': '2025-10-01 04:22:33.418523', 'step': 4795, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:33.472016', 'step': 4795, 'epoch': 1} {'type': 'loss', 'content': 0.12359747290611267, 'timestamp': '2025-10-01 04:22:33.477928', 'step': 4796, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:22:33.531583', 'step': 4796, 'epoch': 1} {'type': 'loss', 'content': 0.1722850650548935, 'timestamp': '2025-10-01 04:22:33.534134', 'step': 4797, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:33.587583', 'step': 4797, 'epoch': 1} {'type': 'loss', 'content': 0.1400824785232544, 'timestamp': '2025-10-01 04:22:33.589694', 'step': 4798, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:33.643326', 'step': 4798, 'epoch': 1} {'type': 'loss', 'content': 0.1471555233001709, 'timestamp': '2025-10-01 04:22:33.648142', 'step': 4799, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:33.701876', 'step': 4799, 'epoch': 1} {'type': 'loss', 'content': 0.16255119442939758, 'timestamp': '2025-10-01 04:22:33.707669', 'step': 4800, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:33.778129', 'step': 4800, 'epoch': 1} {'type': 'loss', 'content': 0.13813777267932892, 'timestamp': '2025-10-01 04:22:33.781344', 'step': 4801, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:33.834466', 'step': 4801, 'epoch': 1} {'type': 'loss', 'content': 0.18231914937496185, 'timestamp': '2025-10-01 04:22:33.848353', 'step': 4802, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:33.912621', 'step': 4802, 'epoch': 1} {'type': 'loss', 'content': 0.16449324786663055, 'timestamp': '2025-10-01 04:22:33.914879', 'step': 4803, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:33.967258', 'step': 4803, 'epoch': 1} {'type': 'loss', 'content': 0.18680992722511292, 'timestamp': '2025-10-01 04:22:33.972985', 'step': 4804, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:34.026332', 'step': 4804, 'epoch': 1} {'type': 'loss', 'content': 0.2909584939479828, 'timestamp': '2025-10-01 04:22:34.029299', 'step': 4805, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:34.082828', 'step': 4805, 'epoch': 1} {'type': 'loss', 'content': 0.1696542203426361, 'timestamp': '2025-10-01 04:22:34.086250', 'step': 4806, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:34.140661', 'step': 4806, 'epoch': 1} {'type': 'loss', 'content': 0.09512164443731308, 'timestamp': '2025-10-01 04:22:34.142922', 'step': 4807, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:34.196742', 'step': 4807, 'epoch': 1} {'type': 'loss', 'content': 0.1622052788734436, 'timestamp': '2025-10-01 04:22:34.202495', 'step': 4808, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:34.254880', 'step': 4808, 'epoch': 1} {'type': 'loss', 'content': 0.177781343460083, 'timestamp': '2025-10-01 04:22:34.256968', 'step': 4809, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:22:34.310646', 'step': 4809, 'epoch': 1} {'type': 'loss', 'content': 0.17824485898017883, 'timestamp': '2025-10-01 04:22:34.312694', 'step': 4810, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:34.372699', 'step': 4810, 'epoch': 1} {'type': 'loss', 'content': 0.11478355526924133, 'timestamp': '2025-10-01 04:22:34.374954', 'step': 4811, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:34.428933', 'step': 4811, 'epoch': 1} {'type': 'loss', 'content': 0.19535566866397858, 'timestamp': '2025-10-01 04:22:34.434671', 'step': 4812, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:34.488252', 'step': 4812, 'epoch': 1} {'type': 'loss', 'content': 0.27087777853012085, 'timestamp': '2025-10-01 04:22:34.491225', 'step': 4813, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:34.548769', 'step': 4813, 'epoch': 1} {'type': 'loss', 'content': 0.2450191229581833, 'timestamp': '2025-10-01 04:22:34.550987', 'step': 4814, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:34.604418', 'step': 4814, 'epoch': 1} {'type': 'loss', 'content': 0.10196257382631302, 'timestamp': '2025-10-01 04:22:34.606406', 'step': 4815, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:34.659771', 'step': 4815, 'epoch': 1} {'type': 'loss', 'content': 0.17890428006649017, 'timestamp': '2025-10-01 04:22:34.675248', 'step': 4816, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:34.734722', 'step': 4816, 'epoch': 1} {'type': 'loss', 'content': 0.17132709920406342, 'timestamp': '2025-10-01 04:22:34.736781', 'step': 4817, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:34.789962', 'step': 4817, 'epoch': 1} {'type': 'loss', 'content': 0.13129457831382751, 'timestamp': '2025-10-01 04:22:34.791883', 'step': 4818, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:34.844417', 'step': 4818, 'epoch': 1} {'type': 'loss', 'content': 0.10077124089002609, 'timestamp': '2025-10-01 04:22:34.846563', 'step': 4819, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:22:34.899865', 'step': 4819, 'epoch': 1} {'type': 'loss', 'content': 0.15342774987220764, 'timestamp': '2025-10-01 04:22:34.905809', 'step': 4820, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:34.958632', 'step': 4820, 'epoch': 1} {'type': 'loss', 'content': 0.16364571452140808, 'timestamp': '2025-10-01 04:22:34.960961', 'step': 4821, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:35.013989', 'step': 4821, 'epoch': 1} {'type': 'loss', 'content': 0.19835548102855682, 'timestamp': '2025-10-01 04:22:35.016154', 'step': 4822, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:22:35.069463', 'step': 4822, 'epoch': 1} {'type': 'loss', 'content': 0.09841744601726532, 'timestamp': '2025-10-01 04:22:35.071866', 'step': 4823, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:35.125583', 'step': 4823, 'epoch': 1} {'type': 'loss', 'content': 0.11685498058795929, 'timestamp': '2025-10-01 04:22:35.131261', 'step': 4824, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:35.191134', 'step': 4824, 'epoch': 1} {'type': 'loss', 'content': 0.09875202924013138, 'timestamp': '2025-10-01 04:22:35.194079', 'step': 4825, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:35.249127', 'step': 4825, 'epoch': 1} {'type': 'loss', 'content': 0.10577121376991272, 'timestamp': '2025-10-01 04:22:35.251564', 'step': 4826, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:35.305947', 'step': 4826, 'epoch': 1} {'type': 'loss', 'content': 0.09434153139591217, 'timestamp': '2025-10-01 04:22:35.308109', 'step': 4827, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:35.362342', 'step': 4827, 'epoch': 1} {'type': 'loss', 'content': 0.12927386164665222, 'timestamp': '2025-10-01 04:22:35.368072', 'step': 4828, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:35.420445', 'step': 4828, 'epoch': 1} {'type': 'loss', 'content': 0.17267096042633057, 'timestamp': '2025-10-01 04:22:35.422811', 'step': 4829, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:35.476132', 'step': 4829, 'epoch': 1} {'type': 'loss', 'content': 0.15468239784240723, 'timestamp': '2025-10-01 04:22:35.478429', 'step': 4830, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:35.531994', 'step': 4830, 'epoch': 1} {'type': 'loss', 'content': 0.13829927146434784, 'timestamp': '2025-10-01 04:22:35.533983', 'step': 4831, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:35.586614', 'step': 4831, 'epoch': 1} {'type': 'loss', 'content': 0.20450055599212646, 'timestamp': '2025-10-01 04:22:35.592107', 'step': 4832, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:22:35.644927', 'step': 4832, 'epoch': 1} {'type': 'loss', 'content': 0.11338365077972412, 'timestamp': '2025-10-01 04:22:35.647299', 'step': 4833, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:35.700850', 'step': 4833, 'epoch': 1} {'type': 'loss', 'content': 0.18712909519672394, 'timestamp': '2025-10-01 04:22:35.702855', 'step': 4834, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:35.756229', 'step': 4834, 'epoch': 1} {'type': 'loss', 'content': 0.1049385815858841, 'timestamp': '2025-10-01 04:22:35.758409', 'step': 4835, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:35.815365', 'step': 4835, 'epoch': 1} {'type': 'loss', 'content': 0.13313862681388855, 'timestamp': '2025-10-01 04:22:35.821155', 'step': 4836, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:35.873907', 'step': 4836, 'epoch': 1} {'type': 'loss', 'content': 0.07942967116832733, 'timestamp': '2025-10-01 04:22:35.876068', 'step': 4837, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:35.930230', 'step': 4837, 'epoch': 1} {'type': 'loss', 'content': 0.1918172836303711, 'timestamp': '2025-10-01 04:22:35.932518', 'step': 4838, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:35.986518', 'step': 4838, 'epoch': 1} {'type': 'loss', 'content': 0.23986177146434784, 'timestamp': '2025-10-01 04:22:35.988387', 'step': 4839, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:36.041326', 'step': 4839, 'epoch': 1} {'type': 'loss', 'content': 0.140079528093338, 'timestamp': '2025-10-01 04:22:36.047740', 'step': 4840, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:22:36.100274', 'step': 4840, 'epoch': 1} {'type': 'loss', 'content': 0.13034957647323608, 'timestamp': '2025-10-01 04:22:36.102489', 'step': 4841, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:36.155263', 'step': 4841, 'epoch': 1} {'type': 'loss', 'content': 0.15595684945583344, 'timestamp': '2025-10-01 04:22:36.157577', 'step': 4842, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:22:36.210810', 'step': 4842, 'epoch': 1} {'type': 'loss', 'content': 0.2434656172990799, 'timestamp': '2025-10-01 04:22:36.213092', 'step': 4843, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:36.265579', 'step': 4843, 'epoch': 1} {'type': 'loss', 'content': 0.13274642825126648, 'timestamp': '2025-10-01 04:22:36.271896', 'step': 4844, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:36.327763', 'step': 4844, 'epoch': 1} {'type': 'loss', 'content': 0.153814435005188, 'timestamp': '2025-10-01 04:22:36.329860', 'step': 4845, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:36.382932', 'step': 4845, 'epoch': 1} {'type': 'loss', 'content': 0.07840336859226227, 'timestamp': '2025-10-01 04:22:36.385190', 'step': 4846, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:36.446352', 'step': 4846, 'epoch': 1} {'type': 'loss', 'content': 0.16656364500522614, 'timestamp': '2025-10-01 04:22:36.449179', 'step': 4847, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:36.503605', 'step': 4847, 'epoch': 1} {'type': 'loss', 'content': 0.16786016523838043, 'timestamp': '2025-10-01 04:22:36.534558', 'step': 4848, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:36.592281', 'step': 4848, 'epoch': 1} {'type': 'loss', 'content': 0.2702639400959015, 'timestamp': '2025-10-01 04:22:36.594293', 'step': 4849, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:36.648504', 'step': 4849, 'epoch': 1} {'type': 'loss', 'content': 0.12767435610294342, 'timestamp': '2025-10-01 04:22:36.650474', 'step': 4850, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:36.703463', 'step': 4850, 'epoch': 1} {'type': 'loss', 'content': 0.13254371285438538, 'timestamp': '2025-10-01 04:22:36.705541', 'step': 4851, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:22:36.760083', 'step': 4851, 'epoch': 1} {'type': 'loss', 'content': 0.20344552397727966, 'timestamp': '2025-10-01 04:22:36.765932', 'step': 4852, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:36.818998', 'step': 4852, 'epoch': 1} {'type': 'loss', 'content': 0.13659071922302246, 'timestamp': '2025-10-01 04:22:36.821164', 'step': 4853, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:36.874539', 'step': 4853, 'epoch': 1} {'type': 'loss', 'content': 0.14643332362174988, 'timestamp': '2025-10-01 04:22:36.876755', 'step': 4854, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:36.939212', 'step': 4854, 'epoch': 1} {'type': 'loss', 'content': 0.17740660905838013, 'timestamp': '2025-10-01 04:22:36.941415', 'step': 4855, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:22:36.994332', 'step': 4855, 'epoch': 1} {'type': 'loss', 'content': 0.14683988690376282, 'timestamp': '2025-10-01 04:22:37.000054', 'step': 4856, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:22:37.052173', 'step': 4856, 'epoch': 1} {'type': 'loss', 'content': 0.07363675534725189, 'timestamp': '2025-10-01 04:22:37.054603', 'step': 4857, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:37.108599', 'step': 4857, 'epoch': 1} {'type': 'loss', 'content': 0.35506510734558105, 'timestamp': '2025-10-01 04:22:37.110745', 'step': 4858, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:37.164064', 'step': 4858, 'epoch': 1} {'type': 'loss', 'content': 0.21609674394130707, 'timestamp': '2025-10-01 04:22:37.166209', 'step': 4859, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:37.219401', 'step': 4859, 'epoch': 1} {'type': 'loss', 'content': 0.12576189637184143, 'timestamp': '2025-10-01 04:22:37.225222', 'step': 4860, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:37.278085', 'step': 4860, 'epoch': 1} {'type': 'loss', 'content': 0.2271985560655594, 'timestamp': '2025-10-01 04:22:37.280240', 'step': 4861, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:37.332703', 'step': 4861, 'epoch': 1} {'type': 'loss', 'content': 0.15036246180534363, 'timestamp': '2025-10-01 04:22:37.335012', 'step': 4862, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:22:37.388289', 'step': 4862, 'epoch': 1} {'type': 'loss', 'content': 0.2907288074493408, 'timestamp': '2025-10-01 04:22:37.390639', 'step': 4863, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:37.443825', 'step': 4863, 'epoch': 1} {'type': 'loss', 'content': 0.09577450901269913, 'timestamp': '2025-10-01 04:22:37.449331', 'step': 4864, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:37.502004', 'step': 4864, 'epoch': 1} {'type': 'loss', 'content': 0.12875640392303467, 'timestamp': '2025-10-01 04:22:37.504093', 'step': 4865, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:22:37.557248', 'step': 4865, 'epoch': 1} {'type': 'loss', 'content': 0.21853405237197876, 'timestamp': '2025-10-01 04:22:37.559491', 'step': 4866, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:37.612743', 'step': 4866, 'epoch': 1} {'type': 'loss', 'content': 0.1948552429676056, 'timestamp': '2025-10-01 04:22:37.614972', 'step': 4867, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:37.669441', 'step': 4867, 'epoch': 1} {'type': 'loss', 'content': 0.19823245704174042, 'timestamp': '2025-10-01 04:22:37.675210', 'step': 4868, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:37.727800', 'step': 4868, 'epoch': 1} {'type': 'loss', 'content': 0.2703702449798584, 'timestamp': '2025-10-01 04:22:37.730028', 'step': 4869, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:37.783153', 'step': 4869, 'epoch': 1} {'type': 'loss', 'content': 0.21270865201950073, 'timestamp': '2025-10-01 04:22:37.785426', 'step': 4870, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:37.848799', 'step': 4870, 'epoch': 1} {'type': 'loss', 'content': 0.15511327981948853, 'timestamp': '2025-10-01 04:22:37.851147', 'step': 4871, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:37.904583', 'step': 4871, 'epoch': 1} {'type': 'loss', 'content': 0.13119256496429443, 'timestamp': '2025-10-01 04:22:37.910544', 'step': 4872, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:37.963166', 'step': 4872, 'epoch': 1} {'type': 'loss', 'content': 0.128327876329422, 'timestamp': '2025-10-01 04:22:37.966158', 'step': 4873, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:38.019313', 'step': 4873, 'epoch': 1} {'type': 'loss', 'content': 0.2108079493045807, 'timestamp': '2025-10-01 04:22:38.022021', 'step': 4874, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:38.076184', 'step': 4874, 'epoch': 1} {'type': 'loss', 'content': 0.1756681203842163, 'timestamp': '2025-10-01 04:22:38.078453', 'step': 4875, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:38.131911', 'step': 4875, 'epoch': 1} {'type': 'loss', 'content': 0.15734201669692993, 'timestamp': '2025-10-01 04:22:38.137741', 'step': 4876, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:38.190521', 'step': 4876, 'epoch': 1} {'type': 'loss', 'content': 0.18088975548744202, 'timestamp': '2025-10-01 04:22:38.192903', 'step': 4877, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:22:38.245957', 'step': 4877, 'epoch': 1} {'type': 'loss', 'content': 0.1059161052107811, 'timestamp': '2025-10-01 04:22:38.248232', 'step': 4878, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:38.301785', 'step': 4878, 'epoch': 1} {'type': 'loss', 'content': 0.24467848241329193, 'timestamp': '2025-10-01 04:22:38.304013', 'step': 4879, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:38.359203', 'step': 4879, 'epoch': 1} {'type': 'loss', 'content': 0.13733801245689392, 'timestamp': '2025-10-01 04:22:38.365272', 'step': 4880, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:38.418970', 'step': 4880, 'epoch': 1} {'type': 'loss', 'content': 0.21691280603408813, 'timestamp': '2025-10-01 04:22:38.421498', 'step': 4881, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:38.475578', 'step': 4881, 'epoch': 1} {'type': 'loss', 'content': 0.1959628015756607, 'timestamp': '2025-10-01 04:22:38.478105', 'step': 4882, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:22:38.532524', 'step': 4882, 'epoch': 1} {'type': 'loss', 'content': 0.11792771518230438, 'timestamp': '2025-10-01 04:22:38.535085', 'step': 4883, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:38.588661', 'step': 4883, 'epoch': 1} {'type': 'loss', 'content': 0.156577005982399, 'timestamp': '2025-10-01 04:22:38.595615', 'step': 4884, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:38.655618', 'step': 4884, 'epoch': 1} {'type': 'loss', 'content': 0.1159721091389656, 'timestamp': '2025-10-01 04:22:38.659079', 'step': 4885, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:38.721415', 'step': 4885, 'epoch': 1} {'type': 'loss', 'content': 0.1975756734609604, 'timestamp': '2025-10-01 04:22:38.724257', 'step': 4886, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:38.783667', 'step': 4886, 'epoch': 1} {'type': 'loss', 'content': 0.18104124069213867, 'timestamp': '2025-10-01 04:22:38.786157', 'step': 4887, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:38.840163', 'step': 4887, 'epoch': 1} {'type': 'loss', 'content': 0.16950595378875732, 'timestamp': '2025-10-01 04:22:38.846163', 'step': 4888, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:38.900786', 'step': 4888, 'epoch': 1} {'type': 'loss', 'content': 0.2104482501745224, 'timestamp': '2025-10-01 04:22:38.903450', 'step': 4889, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:38.958141', 'step': 4889, 'epoch': 1} {'type': 'loss', 'content': 0.2046477496623993, 'timestamp': '2025-10-01 04:22:38.961776', 'step': 4890, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:39.016207', 'step': 4890, 'epoch': 1} {'type': 'loss', 'content': 0.1697826087474823, 'timestamp': '2025-10-01 04:22:39.018762', 'step': 4891, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:39.072563', 'step': 4891, 'epoch': 1} {'type': 'loss', 'content': 0.13120710849761963, 'timestamp': '2025-10-01 04:22:39.078754', 'step': 4892, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:39.132564', 'step': 4892, 'epoch': 1} {'type': 'loss', 'content': 0.21809637546539307, 'timestamp': '2025-10-01 04:22:39.135020', 'step': 4893, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:39.189688', 'step': 4893, 'epoch': 1} {'type': 'loss', 'content': 0.20051638782024384, 'timestamp': '2025-10-01 04:22:39.192462', 'step': 4894, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:22:39.247315', 'step': 4894, 'epoch': 1} {'type': 'loss', 'content': 0.25320136547088623, 'timestamp': '2025-10-01 04:22:39.250032', 'step': 4895, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:39.310162', 'step': 4895, 'epoch': 1} {'type': 'loss', 'content': 0.12248078733682632, 'timestamp': '2025-10-01 04:22:39.316459', 'step': 4896, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:22:39.369718', 'step': 4896, 'epoch': 1} {'type': 'loss', 'content': 0.17083758115768433, 'timestamp': '2025-10-01 04:22:39.372354', 'step': 4897, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:39.433238', 'step': 4897, 'epoch': 1} {'type': 'loss', 'content': 0.09530038386583328, 'timestamp': '2025-10-01 04:22:39.435922', 'step': 4898, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:39.489864', 'step': 4898, 'epoch': 1} {'type': 'loss', 'content': 0.1950942575931549, 'timestamp': '2025-10-01 04:22:39.492533', 'step': 4899, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:22:39.551134', 'step': 4899, 'epoch': 1} {'type': 'loss', 'content': 0.20375895500183105, 'timestamp': '2025-10-01 04:22:39.557448', 'step': 4900, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:39.610995', 'step': 4900, 'epoch': 1} {'type': 'loss', 'content': 0.13044871389865875, 'timestamp': '2025-10-01 04:22:39.613363', 'step': 4901, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:22:39.668178', 'step': 4901, 'epoch': 1} {'type': 'loss', 'content': 0.17695488035678864, 'timestamp': '2025-10-01 04:22:39.670808', 'step': 4902, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:39.725495', 'step': 4902, 'epoch': 1} {'type': 'loss', 'content': 0.1649055927991867, 'timestamp': '2025-10-01 04:22:39.727707', 'step': 4903, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:39.781189', 'step': 4903, 'epoch': 1} {'type': 'loss', 'content': 0.1697259396314621, 'timestamp': '2025-10-01 04:22:39.787089', 'step': 4904, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:39.841443', 'step': 4904, 'epoch': 1} {'type': 'loss', 'content': 0.15717661380767822, 'timestamp': '2025-10-01 04:22:39.843780', 'step': 4905, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:39.896519', 'step': 4905, 'epoch': 1} {'type': 'loss', 'content': 0.15423767268657684, 'timestamp': '2025-10-01 04:22:39.898822', 'step': 4906, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:39.952330', 'step': 4906, 'epoch': 1} {'type': 'loss', 'content': 0.13947807252407074, 'timestamp': '2025-10-01 04:22:39.954582', 'step': 4907, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:40.008232', 'step': 4907, 'epoch': 1} {'type': 'loss', 'content': 0.1548607498407364, 'timestamp': '2025-10-01 04:22:40.014153', 'step': 4908, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:40.069279', 'step': 4908, 'epoch': 1} {'type': 'loss', 'content': 0.15676367282867432, 'timestamp': '2025-10-01 04:22:40.071596', 'step': 4909, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:40.124422', 'step': 4909, 'epoch': 1} {'type': 'loss', 'content': 0.10162080079317093, 'timestamp': '2025-10-01 04:22:40.126777', 'step': 4910, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:40.180757', 'step': 4910, 'epoch': 1} {'type': 'loss', 'content': 0.22836431860923767, 'timestamp': '2025-10-01 04:22:40.183273', 'step': 4911, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:40.242164', 'step': 4911, 'epoch': 1} {'type': 'loss', 'content': 0.19928090274333954, 'timestamp': '2025-10-01 04:22:40.247992', 'step': 4912, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:40.300602', 'step': 4912, 'epoch': 1} {'type': 'loss', 'content': 0.1461719274520874, 'timestamp': '2025-10-01 04:22:40.302694', 'step': 4913, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:40.355910', 'step': 4913, 'epoch': 1} {'type': 'loss', 'content': 0.16394375264644623, 'timestamp': '2025-10-01 04:22:40.358320', 'step': 4914, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:40.411931', 'step': 4914, 'epoch': 1} {'type': 'loss', 'content': 0.18709830939769745, 'timestamp': '2025-10-01 04:22:40.414419', 'step': 4915, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:40.467750', 'step': 4915, 'epoch': 1} {'type': 'loss', 'content': 0.13175317645072937, 'timestamp': '2025-10-01 04:22:40.473982', 'step': 4916, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:40.526815', 'step': 4916, 'epoch': 1} {'type': 'loss', 'content': 0.16254936158657074, 'timestamp': '2025-10-01 04:22:40.529201', 'step': 4917, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:40.582183', 'step': 4917, 'epoch': 1} {'type': 'loss', 'content': 0.12156709283590317, 'timestamp': '2025-10-01 04:22:40.584465', 'step': 4918, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:40.638224', 'step': 4918, 'epoch': 1} {'type': 'loss', 'content': 0.1443706899881363, 'timestamp': '2025-10-01 04:22:40.640413', 'step': 4919, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:40.693549', 'step': 4919, 'epoch': 1} {'type': 'loss', 'content': 0.13514980673789978, 'timestamp': '2025-10-01 04:22:40.710534', 'step': 4920, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:40.775351', 'step': 4920, 'epoch': 1} {'type': 'loss', 'content': 0.16328813135623932, 'timestamp': '2025-10-01 04:22:40.777554', 'step': 4921, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:40.842993', 'step': 4921, 'epoch': 1} {'type': 'loss', 'content': 0.1133192703127861, 'timestamp': '2025-10-01 04:22:40.845334', 'step': 4922, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:40.899229', 'step': 4922, 'epoch': 1} {'type': 'loss', 'content': 0.19773076474666595, 'timestamp': '2025-10-01 04:22:40.901295', 'step': 4923, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:40.954716', 'step': 4923, 'epoch': 1} {'type': 'loss', 'content': 0.13094983994960785, 'timestamp': '2025-10-01 04:22:40.960853', 'step': 4924, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:41.014365', 'step': 4924, 'epoch': 1} {'type': 'loss', 'content': 0.1331387609243393, 'timestamp': '2025-10-01 04:22:41.016553', 'step': 4925, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:41.070675', 'step': 4925, 'epoch': 1} {'type': 'loss', 'content': 0.15711653232574463, 'timestamp': '2025-10-01 04:22:41.072970', 'step': 4926, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:22:41.131437', 'step': 4926, 'epoch': 1} {'type': 'loss', 'content': 0.27190712094306946, 'timestamp': '2025-10-01 04:22:41.133816', 'step': 4927, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:41.187066', 'step': 4927, 'epoch': 1} {'type': 'loss', 'content': 0.16893254220485687, 'timestamp': '2025-10-01 04:22:41.193166', 'step': 4928, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:41.252944', 'step': 4928, 'epoch': 1} {'type': 'loss', 'content': 0.20134998857975006, 'timestamp': '2025-10-01 04:22:41.255483', 'step': 4929, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:22:41.308906', 'step': 4929, 'epoch': 1} {'type': 'loss', 'content': 0.13605521619319916, 'timestamp': '2025-10-01 04:22:41.321265', 'step': 4930, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:41.384428', 'step': 4930, 'epoch': 1} {'type': 'loss', 'content': 0.1259852647781372, 'timestamp': '2025-10-01 04:22:41.387812', 'step': 4931, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:41.447988', 'step': 4931, 'epoch': 1} {'type': 'loss', 'content': 0.13669048249721527, 'timestamp': '2025-10-01 04:22:41.455497', 'step': 4932, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:41.525491', 'step': 4932, 'epoch': 1} {'type': 'loss', 'content': 0.0951194316148758, 'timestamp': '2025-10-01 04:22:41.529861', 'step': 4933, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:41.591022', 'step': 4933, 'epoch': 1} {'type': 'loss', 'content': 0.16629883646965027, 'timestamp': '2025-10-01 04:22:41.593457', 'step': 4934, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:41.659253', 'step': 4934, 'epoch': 1} {'type': 'loss', 'content': 0.15461744368076324, 'timestamp': '2025-10-01 04:22:41.665280', 'step': 4935, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:41.731656', 'step': 4935, 'epoch': 1} {'type': 'loss', 'content': 0.1967397779226303, 'timestamp': '2025-10-01 04:22:41.752223', 'step': 4936, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:41.806119', 'step': 4936, 'epoch': 1} {'type': 'loss', 'content': 0.174015611410141, 'timestamp': '2025-10-01 04:22:41.809039', 'step': 4937, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:41.873644', 'step': 4937, 'epoch': 1} {'type': 'loss', 'content': 0.15584847331047058, 'timestamp': '2025-10-01 04:22:41.880229', 'step': 4938, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:41.938132', 'step': 4938, 'epoch': 1} {'type': 'loss', 'content': 0.17813605070114136, 'timestamp': '2025-10-01 04:22:41.946743', 'step': 4939, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:42.007784', 'step': 4939, 'epoch': 1} {'type': 'loss', 'content': 0.1111038476228714, 'timestamp': '2025-10-01 04:22:42.014806', 'step': 4940, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:42.074668', 'step': 4940, 'epoch': 1} {'type': 'loss', 'content': 0.13220104575157166, 'timestamp': '2025-10-01 04:22:42.092486', 'step': 4941, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:42.171177', 'step': 4941, 'epoch': 1} {'type': 'loss', 'content': 0.23068775236606598, 'timestamp': '2025-10-01 04:22:42.176344', 'step': 4942, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:42.232050', 'step': 4942, 'epoch': 1} {'type': 'loss', 'content': 0.12516599893569946, 'timestamp': '2025-10-01 04:22:42.235679', 'step': 4943, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:42.292587', 'step': 4943, 'epoch': 1} {'type': 'loss', 'content': 0.27252325415611267, 'timestamp': '2025-10-01 04:22:42.303078', 'step': 4944, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:42.370789', 'step': 4944, 'epoch': 1} {'type': 'loss', 'content': 0.15215760469436646, 'timestamp': '2025-10-01 04:22:42.380752', 'step': 4945, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:22:42.464266', 'step': 4945, 'epoch': 1} {'type': 'loss', 'content': 0.3263638913631439, 'timestamp': '2025-10-01 04:22:42.476157', 'step': 4946, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:22:42.540683', 'step': 4946, 'epoch': 1} {'type': 'loss', 'content': 0.20296941697597504, 'timestamp': '2025-10-01 04:22:42.547500', 'step': 4947, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:42.608830', 'step': 4947, 'epoch': 1} {'type': 'loss', 'content': 0.17061570286750793, 'timestamp': '2025-10-01 04:22:42.619660', 'step': 4948, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:42.675885', 'step': 4948, 'epoch': 1} {'type': 'loss', 'content': 0.18993543088436127, 'timestamp': '2025-10-01 04:22:42.678030', 'step': 4949, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:42.742234', 'step': 4949, 'epoch': 1} {'type': 'loss', 'content': 0.10382970422506332, 'timestamp': '2025-10-01 04:22:42.744473', 'step': 4950, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:42.798345', 'step': 4950, 'epoch': 1} {'type': 'loss', 'content': 0.14695893228054047, 'timestamp': '2025-10-01 04:22:42.800380', 'step': 4951, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:22:42.853208', 'step': 4951, 'epoch': 1} {'type': 'loss', 'content': 0.16149941086769104, 'timestamp': '2025-10-01 04:22:42.858903', 'step': 4952, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:42.911372', 'step': 4952, 'epoch': 1} {'type': 'loss', 'content': 0.15558598935604095, 'timestamp': '2025-10-01 04:22:42.913429', 'step': 4953, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:42.967305', 'step': 4953, 'epoch': 1} {'type': 'loss', 'content': 0.1396436244249344, 'timestamp': '2025-10-01 04:22:42.970002', 'step': 4954, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:22:43.024344', 'step': 4954, 'epoch': 1} {'type': 'loss', 'content': 0.1480076164007187, 'timestamp': '2025-10-01 04:22:43.026580', 'step': 4955, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:43.079879', 'step': 4955, 'epoch': 1} {'type': 'loss', 'content': 0.2651989758014679, 'timestamp': '2025-10-01 04:22:43.085668', 'step': 4956, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:22:43.139211', 'step': 4956, 'epoch': 1} {'type': 'loss', 'content': 0.12048943340778351, 'timestamp': '2025-10-01 04:22:43.141608', 'step': 4957, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:43.194827', 'step': 4957, 'epoch': 1} {'type': 'loss', 'content': 0.1564963310956955, 'timestamp': '2025-10-01 04:22:43.197189', 'step': 4958, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:43.251712', 'step': 4958, 'epoch': 1} {'type': 'loss', 'content': 0.20537018775939941, 'timestamp': '2025-10-01 04:22:43.253998', 'step': 4959, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:43.308388', 'step': 4959, 'epoch': 1} {'type': 'loss', 'content': 0.1423422396183014, 'timestamp': '2025-10-01 04:22:43.314196', 'step': 4960, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:43.367159', 'step': 4960, 'epoch': 1} {'type': 'loss', 'content': 0.14411424100399017, 'timestamp': '2025-10-01 04:22:43.369324', 'step': 4961, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:43.422507', 'step': 4961, 'epoch': 1} {'type': 'loss', 'content': 0.18823948502540588, 'timestamp': '2025-10-01 04:22:43.424593', 'step': 4962, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:43.477659', 'step': 4962, 'epoch': 1} {'type': 'loss', 'content': 0.13236334919929504, 'timestamp': '2025-10-01 04:22:43.479776', 'step': 4963, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:43.533525', 'step': 4963, 'epoch': 1} {'type': 'loss', 'content': 0.12406344711780548, 'timestamp': '2025-10-01 04:22:43.539774', 'step': 4964, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:43.592427', 'step': 4964, 'epoch': 1} {'type': 'loss', 'content': 0.1160912960767746, 'timestamp': '2025-10-01 04:22:43.594912', 'step': 4965, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:43.665421', 'step': 4965, 'epoch': 1} {'type': 'loss', 'content': 0.19765734672546387, 'timestamp': '2025-10-01 04:22:43.667672', 'step': 4966, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:43.722843', 'step': 4966, 'epoch': 1} {'type': 'loss', 'content': 0.18300823867321014, 'timestamp': '2025-10-01 04:22:43.725251', 'step': 4967, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:43.781034', 'step': 4967, 'epoch': 1} {'type': 'loss', 'content': 0.08804737031459808, 'timestamp': '2025-10-01 04:22:43.786936', 'step': 4968, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:22:43.839358', 'step': 4968, 'epoch': 1} {'type': 'loss', 'content': 0.23068951070308685, 'timestamp': '2025-10-01 04:22:43.841689', 'step': 4969, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:43.905638', 'step': 4969, 'epoch': 1} {'type': 'loss', 'content': 0.1570780873298645, 'timestamp': '2025-10-01 04:22:43.907889', 'step': 4970, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:43.973260', 'step': 4970, 'epoch': 1} {'type': 'loss', 'content': 0.19560635089874268, 'timestamp': '2025-10-01 04:22:43.975489', 'step': 4971, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:44.029804', 'step': 4971, 'epoch': 1} {'type': 'loss', 'content': 0.11342856287956238, 'timestamp': '2025-10-01 04:22:44.035904', 'step': 4972, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:22:44.088905', 'step': 4972, 'epoch': 1} {'type': 'loss', 'content': 0.16936782002449036, 'timestamp': '2025-10-01 04:22:44.091406', 'step': 4973, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:44.144398', 'step': 4973, 'epoch': 1} {'type': 'loss', 'content': 0.2258220762014389, 'timestamp': '2025-10-01 04:22:44.146567', 'step': 4974, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:44.200516', 'step': 4974, 'epoch': 1} {'type': 'loss', 'content': 0.115826316177845, 'timestamp': '2025-10-01 04:22:44.202724', 'step': 4975, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:22:44.255589', 'step': 4975, 'epoch': 1} {'type': 'loss', 'content': 0.09852095693349838, 'timestamp': '2025-10-01 04:22:44.261455', 'step': 4976, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:44.314081', 'step': 4976, 'epoch': 1} {'type': 'loss', 'content': 0.16163010895252228, 'timestamp': '2025-10-01 04:22:44.316223', 'step': 4977, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:22:44.370351', 'step': 4977, 'epoch': 1} {'type': 'loss', 'content': 0.14066831767559052, 'timestamp': '2025-10-01 04:22:44.373542', 'step': 4978, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:22:44.427564', 'step': 4978, 'epoch': 1} {'type': 'loss', 'content': 0.2724147439002991, 'timestamp': '2025-10-01 04:22:44.429966', 'step': 4979, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:44.483558', 'step': 4979, 'epoch': 1} {'type': 'loss', 'content': 0.12999171018600464, 'timestamp': '2025-10-01 04:22:44.489437', 'step': 4980, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:44.542151', 'step': 4980, 'epoch': 1} {'type': 'loss', 'content': 0.1529327630996704, 'timestamp': '2025-10-01 04:22:44.550360', 'step': 4981, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:22:44.617121', 'step': 4981, 'epoch': 1} {'type': 'loss', 'content': 0.1204814538359642, 'timestamp': '2025-10-01 04:22:44.624611', 'step': 4982, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:44.716966', 'step': 4982, 'epoch': 1} {'type': 'loss', 'content': 0.10374941676855087, 'timestamp': '2025-10-01 04:22:44.719439', 'step': 4983, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:44.781345', 'step': 4983, 'epoch': 1} {'type': 'loss', 'content': 0.20693853497505188, 'timestamp': '2025-10-01 04:22:44.799795', 'step': 4984, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:44.871651', 'step': 4984, 'epoch': 1} {'type': 'loss', 'content': 0.23103195428848267, 'timestamp': '2025-10-01 04:22:44.876942', 'step': 4985, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:22:44.939552', 'step': 4985, 'epoch': 1} {'type': 'loss', 'content': 0.18070051074028015, 'timestamp': '2025-10-01 04:22:44.949410', 'step': 4986, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:22:45.023075', 'step': 4986, 'epoch': 1} {'type': 'loss', 'content': 0.15676121413707733, 'timestamp': '2025-10-01 04:22:45.028969', 'step': 4987, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:45.097853', 'step': 4987, 'epoch': 1} {'type': 'loss', 'content': 0.12273894250392914, 'timestamp': '2025-10-01 04:22:45.107047', 'step': 4988, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:45.172723', 'step': 4988, 'epoch': 1} {'type': 'loss', 'content': 0.10950732231140137, 'timestamp': '2025-10-01 04:22:45.180713', 'step': 4989, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:45.244646', 'step': 4989, 'epoch': 1} {'type': 'loss', 'content': 0.13620613515377045, 'timestamp': '2025-10-01 04:22:45.255665', 'step': 4990, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:45.348742', 'step': 4990, 'epoch': 1} {'type': 'loss', 'content': 0.2229163497686386, 'timestamp': '2025-10-01 04:22:45.360032', 'step': 4991, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:22:45.428271', 'step': 4991, 'epoch': 1} {'type': 'loss', 'content': 0.21095633506774902, 'timestamp': '2025-10-01 04:22:45.441180', 'step': 4992, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:45.518728', 'step': 4992, 'epoch': 1} {'type': 'loss', 'content': 0.15583829581737518, 'timestamp': '2025-10-01 04:22:45.526218', 'step': 4993, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:45.595424', 'step': 4993, 'epoch': 1} {'type': 'loss', 'content': 0.2493925839662552, 'timestamp': '2025-10-01 04:22:45.606842', 'step': 4994, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:45.678846', 'step': 4994, 'epoch': 1} {'type': 'loss', 'content': 0.151719868183136, 'timestamp': '2025-10-01 04:22:45.683420', 'step': 4995, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:45.759064', 'step': 4995, 'epoch': 1} {'type': 'loss', 'content': 0.1381971538066864, 'timestamp': '2025-10-01 04:22:45.771957', 'step': 4996, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:45.829366', 'step': 4996, 'epoch': 1} {'type': 'loss', 'content': 0.162946879863739, 'timestamp': '2025-10-01 04:22:45.842894', 'step': 4997, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:22:45.903336', 'step': 4997, 'epoch': 1} {'type': 'loss', 'content': 0.15652702748775482, 'timestamp': '2025-10-01 04:22:45.905670', 'step': 4998, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:45.964597', 'step': 4998, 'epoch': 1} {'type': 'loss', 'content': 0.22641338407993317, 'timestamp': '2025-10-01 04:22:45.966884', 'step': 4999, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-01 04:22:46.022217', 'step': 4999, 'epoch': 1} {'type': 'loss', 'content': 0.14087772369384766, 'timestamp': '2025-10-01 04:22:46.029173', 'step': 5000, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 5000', 'timestamp': '2025-10-01 04:22:46.407822', 'step': 5000, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:46.466590', 'step': 5000, 'epoch': 1} {'type': 'loss', 'content': 0.1332717388868332, 'timestamp': '2025-10-01 04:22:46.468799', 'step': 5001, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:46.525614', 'step': 5001, 'epoch': 1} {'type': 'loss', 'content': 0.12158838659524918, 'timestamp': '2025-10-01 04:22:46.527697', 'step': 5002, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:46.581621', 'step': 5002, 'epoch': 1} {'type': 'loss', 'content': 0.15568961203098297, 'timestamp': '2025-10-01 04:22:46.584966', 'step': 5003, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:46.640258', 'step': 5003, 'epoch': 1} {'type': 'loss', 'content': 0.1470358967781067, 'timestamp': '2025-10-01 04:22:46.645790', 'step': 5004, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:46.699162', 'step': 5004, 'epoch': 1} {'type': 'loss', 'content': 0.19641521573066711, 'timestamp': '2025-10-01 04:22:46.700792', 'step': 5005, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:46.757848', 'step': 5005, 'epoch': 1} {'type': 'loss', 'content': 0.19357894361019135, 'timestamp': '2025-10-01 04:22:46.759551', 'step': 5006, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:46.812717', 'step': 5006, 'epoch': 1} {'type': 'loss', 'content': 0.1582842767238617, 'timestamp': '2025-10-01 04:22:46.814815', 'step': 5007, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:46.867426', 'step': 5007, 'epoch': 1} {'type': 'loss', 'content': 0.1674448549747467, 'timestamp': '2025-10-01 04:22:46.873498', 'step': 5008, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:22:46.926147', 'step': 5008, 'epoch': 1} {'type': 'loss', 'content': 0.18751396238803864, 'timestamp': '2025-10-01 04:22:46.928176', 'step': 5009, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:46.981648', 'step': 5009, 'epoch': 1} {'type': 'loss', 'content': 0.15981647372245789, 'timestamp': '2025-10-01 04:22:46.983881', 'step': 5010, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:47.037254', 'step': 5010, 'epoch': 1} {'type': 'loss', 'content': 0.20445260405540466, 'timestamp': '2025-10-01 04:22:47.039658', 'step': 5011, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:47.092906', 'step': 5011, 'epoch': 1} {'type': 'loss', 'content': 0.16967260837554932, 'timestamp': '2025-10-01 04:22:47.098989', 'step': 5012, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:47.151740', 'step': 5012, 'epoch': 1} {'type': 'loss', 'content': 0.17235778272151947, 'timestamp': '2025-10-01 04:22:47.153564', 'step': 5013, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:47.208912', 'step': 5013, 'epoch': 1} {'type': 'loss', 'content': 0.14890028536319733, 'timestamp': '2025-10-01 04:22:47.211094', 'step': 5014, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:47.269469', 'step': 5014, 'epoch': 1} {'type': 'loss', 'content': 0.11926106363534927, 'timestamp': '2025-10-01 04:22:47.272038', 'step': 5015, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:47.330571', 'step': 5015, 'epoch': 1} {'type': 'loss', 'content': 0.14598390460014343, 'timestamp': '2025-10-01 04:22:47.337508', 'step': 5016, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:47.395162', 'step': 5016, 'epoch': 1} {'type': 'loss', 'content': 0.2317771464586258, 'timestamp': '2025-10-01 04:22:47.397541', 'step': 5017, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:47.462017', 'step': 5017, 'epoch': 1} {'type': 'loss', 'content': 0.1902623474597931, 'timestamp': '2025-10-01 04:22:47.464338', 'step': 5018, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:47.522393', 'step': 5018, 'epoch': 1} {'type': 'loss', 'content': 0.13103386759757996, 'timestamp': '2025-10-01 04:22:47.524421', 'step': 5019, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:47.580767', 'step': 5019, 'epoch': 1} {'type': 'loss', 'content': 0.1995166391134262, 'timestamp': '2025-10-01 04:22:47.587179', 'step': 5020, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:47.642829', 'step': 5020, 'epoch': 1} {'type': 'loss', 'content': 0.09291680157184601, 'timestamp': '2025-10-01 04:22:47.645183', 'step': 5021, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:47.704436', 'step': 5021, 'epoch': 1} {'type': 'loss', 'content': 0.15789130330085754, 'timestamp': '2025-10-01 04:22:47.706639', 'step': 5022, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:47.762310', 'step': 5022, 'epoch': 1} {'type': 'loss', 'content': 0.10611231625080109, 'timestamp': '2025-10-01 04:22:47.764607', 'step': 5023, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:47.820559', 'step': 5023, 'epoch': 1} {'type': 'loss', 'content': 0.14930017292499542, 'timestamp': '2025-10-01 04:22:47.827228', 'step': 5024, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:22:47.881870', 'step': 5024, 'epoch': 1} {'type': 'loss', 'content': 0.1152317002415657, 'timestamp': '2025-10-01 04:22:47.883662', 'step': 5025, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:22:47.937057', 'step': 5025, 'epoch': 1} {'type': 'loss', 'content': 0.2048969268798828, 'timestamp': '2025-10-01 04:22:47.939289', 'step': 5026, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:47.993790', 'step': 5026, 'epoch': 1} {'type': 'loss', 'content': 0.2343476116657257, 'timestamp': '2025-10-01 04:22:47.995830', 'step': 5027, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:48.047904', 'step': 5027, 'epoch': 1} {'type': 'loss', 'content': 0.21370144188404083, 'timestamp': '2025-10-01 04:22:48.054086', 'step': 5028, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:48.107719', 'step': 5028, 'epoch': 1} {'type': 'loss', 'content': 0.16687044501304626, 'timestamp': '2025-10-01 04:22:48.109830', 'step': 5029, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:48.163149', 'step': 5029, 'epoch': 1} {'type': 'loss', 'content': 0.16736429929733276, 'timestamp': '2025-10-01 04:22:48.165602', 'step': 5030, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:48.218221', 'step': 5030, 'epoch': 1} {'type': 'loss', 'content': 0.1360073834657669, 'timestamp': '2025-10-01 04:22:48.220521', 'step': 5031, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:48.273711', 'step': 5031, 'epoch': 1} {'type': 'loss', 'content': 0.13653644919395447, 'timestamp': '2025-10-01 04:22:48.279283', 'step': 5032, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:48.332523', 'step': 5032, 'epoch': 1} {'type': 'loss', 'content': 0.18764635920524597, 'timestamp': '2025-10-01 04:22:48.334457', 'step': 5033, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:48.387632', 'step': 5033, 'epoch': 1} {'type': 'loss', 'content': 0.16639290750026703, 'timestamp': '2025-10-01 04:22:48.389488', 'step': 5034, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:48.442130', 'step': 5034, 'epoch': 1} {'type': 'loss', 'content': 0.11040680855512619, 'timestamp': '2025-10-01 04:22:48.444389', 'step': 5035, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:48.502439', 'step': 5035, 'epoch': 1} {'type': 'loss', 'content': 0.27436983585357666, 'timestamp': '2025-10-01 04:22:48.508395', 'step': 5036, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:22:48.560809', 'step': 5036, 'epoch': 1} {'type': 'loss', 'content': 0.1349763423204422, 'timestamp': '2025-10-01 04:22:48.563192', 'step': 5037, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:48.616556', 'step': 5037, 'epoch': 1} {'type': 'loss', 'content': 0.18899491429328918, 'timestamp': '2025-10-01 04:22:48.618840', 'step': 5038, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:48.672363', 'step': 5038, 'epoch': 1} {'type': 'loss', 'content': 0.26673996448516846, 'timestamp': '2025-10-01 04:22:48.674253', 'step': 5039, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:48.727403', 'step': 5039, 'epoch': 1} {'type': 'loss', 'content': 0.19041186571121216, 'timestamp': '2025-10-01 04:22:48.732897', 'step': 5040, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:48.785273', 'step': 5040, 'epoch': 1} {'type': 'loss', 'content': 0.1728954017162323, 'timestamp': '2025-10-01 04:22:48.787412', 'step': 5041, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:48.840387', 'step': 5041, 'epoch': 1} {'type': 'loss', 'content': 0.15704114735126495, 'timestamp': '2025-10-01 04:22:48.842616', 'step': 5042, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:48.895618', 'step': 5042, 'epoch': 1} {'type': 'loss', 'content': 0.22244705259799957, 'timestamp': '2025-10-01 04:22:48.898700', 'step': 5043, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:48.951901', 'step': 5043, 'epoch': 1} {'type': 'loss', 'content': 0.20093347132205963, 'timestamp': '2025-10-01 04:22:48.958367', 'step': 5044, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:49.011010', 'step': 5044, 'epoch': 1} {'type': 'loss', 'content': 0.2501477301120758, 'timestamp': '2025-10-01 04:22:49.013229', 'step': 5045, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:22:49.066203', 'step': 5045, 'epoch': 1} {'type': 'loss', 'content': 0.12589547038078308, 'timestamp': '2025-10-01 04:22:49.068380', 'step': 5046, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:49.122139', 'step': 5046, 'epoch': 1} {'type': 'loss', 'content': 0.211660236120224, 'timestamp': '2025-10-01 04:22:49.123986', 'step': 5047, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:49.176751', 'step': 5047, 'epoch': 1} {'type': 'loss', 'content': 0.13864310085773468, 'timestamp': '2025-10-01 04:22:49.182315', 'step': 5048, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:22:49.240805', 'step': 5048, 'epoch': 1} {'type': 'loss', 'content': 0.1217820942401886, 'timestamp': '2025-10-01 04:22:49.243031', 'step': 5049, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:49.296490', 'step': 5049, 'epoch': 1} {'type': 'loss', 'content': 0.14619441330432892, 'timestamp': '2025-10-01 04:22:49.298796', 'step': 5050, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:49.351808', 'step': 5050, 'epoch': 1} {'type': 'loss', 'content': 0.21866631507873535, 'timestamp': '2025-10-01 04:22:49.354030', 'step': 5051, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:49.406666', 'step': 5051, 'epoch': 1} {'type': 'loss', 'content': 0.1919555962085724, 'timestamp': '2025-10-01 04:22:49.412445', 'step': 5052, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:49.464901', 'step': 5052, 'epoch': 1} {'type': 'loss', 'content': 0.15702804923057556, 'timestamp': '2025-10-01 04:22:49.467031', 'step': 5053, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:49.519726', 'step': 5053, 'epoch': 1} {'type': 'loss', 'content': 0.14681574702262878, 'timestamp': '2025-10-01 04:22:49.521634', 'step': 5054, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:22:49.574735', 'step': 5054, 'epoch': 1} {'type': 'loss', 'content': 0.1975889801979065, 'timestamp': '2025-10-01 04:22:49.576573', 'step': 5055, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:49.631705', 'step': 5055, 'epoch': 1} {'type': 'loss', 'content': 0.10002081096172333, 'timestamp': '2025-10-01 04:22:49.638826', 'step': 5056, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:22:49.690882', 'step': 5056, 'epoch': 1} {'type': 'loss', 'content': 0.3435324430465698, 'timestamp': '2025-10-01 04:22:49.693098', 'step': 5057, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:49.746997', 'step': 5057, 'epoch': 1} {'type': 'loss', 'content': 0.1386568248271942, 'timestamp': '2025-10-01 04:22:49.749362', 'step': 5058, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:22:49.802426', 'step': 5058, 'epoch': 1} {'type': 'loss', 'content': 0.15644621849060059, 'timestamp': '2025-10-01 04:22:49.804794', 'step': 5059, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:49.859561', 'step': 5059, 'epoch': 1} {'type': 'loss', 'content': 0.08236341178417206, 'timestamp': '2025-10-01 04:22:49.865556', 'step': 5060, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:49.919799', 'step': 5060, 'epoch': 1} {'type': 'loss', 'content': 0.09870739281177521, 'timestamp': '2025-10-01 04:22:49.921576', 'step': 5061, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:49.974814', 'step': 5061, 'epoch': 1} {'type': 'loss', 'content': 0.16004401445388794, 'timestamp': '2025-10-01 04:22:49.977209', 'step': 5062, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:50.038479', 'step': 5062, 'epoch': 1} {'type': 'loss', 'content': 0.14600008726119995, 'timestamp': '2025-10-01 04:22:50.040884', 'step': 5063, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:50.094559', 'step': 5063, 'epoch': 1} {'type': 'loss', 'content': 0.25557592511177063, 'timestamp': '2025-10-01 04:22:50.101137', 'step': 5064, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:50.154297', 'step': 5064, 'epoch': 1} {'type': 'loss', 'content': 0.15405164659023285, 'timestamp': '2025-10-01 04:22:50.167063', 'step': 5065, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:50.220973', 'step': 5065, 'epoch': 1} {'type': 'loss', 'content': 0.22058621048927307, 'timestamp': '2025-10-01 04:22:50.223483', 'step': 5066, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:50.297211', 'step': 5066, 'epoch': 1} {'type': 'loss', 'content': 0.22135601937770844, 'timestamp': '2025-10-01 04:22:50.299523', 'step': 5067, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:50.352214', 'step': 5067, 'epoch': 1} {'type': 'loss', 'content': 0.15093785524368286, 'timestamp': '2025-10-01 04:22:50.358049', 'step': 5068, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:22:50.410771', 'step': 5068, 'epoch': 1} {'type': 'loss', 'content': 0.18400989472866058, 'timestamp': '2025-10-01 04:22:50.413167', 'step': 5069, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:50.465881', 'step': 5069, 'epoch': 1} {'type': 'loss', 'content': 0.23514799773693085, 'timestamp': '2025-10-01 04:22:50.468321', 'step': 5070, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:50.521355', 'step': 5070, 'epoch': 1} {'type': 'loss', 'content': 0.14071983098983765, 'timestamp': '2025-10-01 04:22:50.523412', 'step': 5071, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:50.576400', 'step': 5071, 'epoch': 1} {'type': 'loss', 'content': 0.06572972983121872, 'timestamp': '2025-10-01 04:22:50.582354', 'step': 5072, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:50.634403', 'step': 5072, 'epoch': 1} {'type': 'loss', 'content': 0.14496086537837982, 'timestamp': '2025-10-01 04:22:50.636895', 'step': 5073, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:50.691337', 'step': 5073, 'epoch': 1} {'type': 'loss', 'content': 0.189748615026474, 'timestamp': '2025-10-01 04:22:50.693779', 'step': 5074, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:50.746712', 'step': 5074, 'epoch': 1} {'type': 'loss', 'content': 0.16556455194950104, 'timestamp': '2025-10-01 04:22:50.749143', 'step': 5075, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:50.802133', 'step': 5075, 'epoch': 1} {'type': 'loss', 'content': 0.16852658987045288, 'timestamp': '2025-10-01 04:22:50.807975', 'step': 5076, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:50.862664', 'step': 5076, 'epoch': 1} {'type': 'loss', 'content': 0.13378192484378815, 'timestamp': '2025-10-01 04:22:50.864884', 'step': 5077, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:50.918226', 'step': 5077, 'epoch': 1} {'type': 'loss', 'content': 0.14082196354866028, 'timestamp': '2025-10-01 04:22:50.920217', 'step': 5078, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:22:50.978104', 'step': 5078, 'epoch': 1} {'type': 'loss', 'content': 0.19057022035121918, 'timestamp': '2025-10-01 04:22:50.980454', 'step': 5079, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:51.043090', 'step': 5079, 'epoch': 1} {'type': 'loss', 'content': 0.12417002767324448, 'timestamp': '2025-10-01 04:22:51.048822', 'step': 5080, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:51.102096', 'step': 5080, 'epoch': 1} {'type': 'loss', 'content': 0.18316860496997833, 'timestamp': '2025-10-01 04:22:51.104262', 'step': 5081, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:51.166287', 'step': 5081, 'epoch': 1} {'type': 'loss', 'content': 0.23091183602809906, 'timestamp': '2025-10-01 04:22:51.168654', 'step': 5082, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:22:51.223391', 'step': 5082, 'epoch': 1} {'type': 'loss', 'content': 0.13681596517562866, 'timestamp': '2025-10-01 04:22:51.225696', 'step': 5083, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:51.280251', 'step': 5083, 'epoch': 1} {'type': 'loss', 'content': 0.1133418083190918, 'timestamp': '2025-10-01 04:22:51.286727', 'step': 5084, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:51.349414', 'step': 5084, 'epoch': 1} {'type': 'loss', 'content': 0.15128375589847565, 'timestamp': '2025-10-01 04:22:51.351593', 'step': 5085, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:51.404473', 'step': 5085, 'epoch': 1} {'type': 'loss', 'content': 0.10982860624790192, 'timestamp': '2025-10-01 04:22:51.409936', 'step': 5086, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:51.465219', 'step': 5086, 'epoch': 1} {'type': 'loss', 'content': 0.22178134322166443, 'timestamp': '2025-10-01 04:22:51.467902', 'step': 5087, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:51.526546', 'step': 5087, 'epoch': 1} {'type': 'loss', 'content': 0.13056643307209015, 'timestamp': '2025-10-01 04:22:51.532533', 'step': 5088, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:51.584603', 'step': 5088, 'epoch': 1} {'type': 'loss', 'content': 0.10305552184581757, 'timestamp': '2025-10-01 04:22:51.587003', 'step': 5089, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:51.640082', 'step': 5089, 'epoch': 1} {'type': 'loss', 'content': 0.20662853121757507, 'timestamp': '2025-10-01 04:22:51.642210', 'step': 5090, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:51.699395', 'step': 5090, 'epoch': 1} {'type': 'loss', 'content': 0.2494836300611496, 'timestamp': '2025-10-01 04:22:51.701444', 'step': 5091, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:51.753962', 'step': 5091, 'epoch': 1} {'type': 'loss', 'content': 0.2806115746498108, 'timestamp': '2025-10-01 04:22:51.759715', 'step': 5092, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:51.815397', 'step': 5092, 'epoch': 1} {'type': 'loss', 'content': 0.09156164526939392, 'timestamp': '2025-10-01 04:22:51.818178', 'step': 5093, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:51.873632', 'step': 5093, 'epoch': 1} {'type': 'loss', 'content': 0.13860748708248138, 'timestamp': '2025-10-01 04:22:51.876016', 'step': 5094, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:51.931529', 'step': 5094, 'epoch': 1} {'type': 'loss', 'content': 0.19471469521522522, 'timestamp': '2025-10-01 04:22:51.942560', 'step': 5095, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:51.997529', 'step': 5095, 'epoch': 1} {'type': 'loss', 'content': 0.22107796370983124, 'timestamp': '2025-10-01 04:22:52.003373', 'step': 5096, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:52.058270', 'step': 5096, 'epoch': 1} {'type': 'loss', 'content': 0.1224713921546936, 'timestamp': '2025-10-01 04:22:52.060474', 'step': 5097, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:52.113223', 'step': 5097, 'epoch': 1} {'type': 'loss', 'content': 0.12107513099908829, 'timestamp': '2025-10-01 04:22:52.115607', 'step': 5098, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:52.168615', 'step': 5098, 'epoch': 1} {'type': 'loss', 'content': 0.1633727252483368, 'timestamp': '2025-10-01 04:22:52.170950', 'step': 5099, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:22:52.223483', 'step': 5099, 'epoch': 1} {'type': 'loss', 'content': 0.18354111909866333, 'timestamp': '2025-10-01 04:22:52.229328', 'step': 5100, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:22:52.281347', 'step': 5100, 'epoch': 1} {'type': 'loss', 'content': 0.15639247000217438, 'timestamp': '2025-10-01 04:22:52.283522', 'step': 5101, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:52.349295', 'step': 5101, 'epoch': 1} {'type': 'loss', 'content': 0.19517488777637482, 'timestamp': '2025-10-01 04:22:52.351659', 'step': 5102, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:52.404464', 'step': 5102, 'epoch': 1} {'type': 'loss', 'content': 0.24658334255218506, 'timestamp': '2025-10-01 04:22:52.406887', 'step': 5103, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:52.469768', 'step': 5103, 'epoch': 1} {'type': 'loss', 'content': 0.23896168172359467, 'timestamp': '2025-10-01 04:22:52.475583', 'step': 5104, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:52.528301', 'step': 5104, 'epoch': 1} {'type': 'loss', 'content': 0.1698649823665619, 'timestamp': '2025-10-01 04:22:52.530670', 'step': 5105, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:52.583597', 'step': 5105, 'epoch': 1} {'type': 'loss', 'content': 0.14303624629974365, 'timestamp': '2025-10-01 04:22:52.585927', 'step': 5106, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:52.639667', 'step': 5106, 'epoch': 1} {'type': 'loss', 'content': 0.1574983447790146, 'timestamp': '2025-10-01 04:22:52.641903', 'step': 5107, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:52.694550', 'step': 5107, 'epoch': 1} {'type': 'loss', 'content': 0.1630067080259323, 'timestamp': '2025-10-01 04:22:52.700437', 'step': 5108, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:52.758617', 'step': 5108, 'epoch': 1} {'type': 'loss', 'content': 0.24623139202594757, 'timestamp': '2025-10-01 04:22:52.760804', 'step': 5109, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:52.813426', 'step': 5109, 'epoch': 1} {'type': 'loss', 'content': 0.25912728905677795, 'timestamp': '2025-10-01 04:22:52.815643', 'step': 5110, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:52.868521', 'step': 5110, 'epoch': 1} {'type': 'loss', 'content': 0.1863475739955902, 'timestamp': '2025-10-01 04:22:52.870799', 'step': 5111, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:52.924088', 'step': 5111, 'epoch': 1} {'type': 'loss', 'content': 0.13048914074897766, 'timestamp': '2025-10-01 04:22:52.929910', 'step': 5112, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:52.983292', 'step': 5112, 'epoch': 1} {'type': 'loss', 'content': 0.173696830868721, 'timestamp': '2025-10-01 04:22:52.985357', 'step': 5113, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:22:53.047951', 'step': 5113, 'epoch': 1} {'type': 'loss', 'content': 0.2595328986644745, 'timestamp': '2025-10-01 04:22:53.050812', 'step': 5114, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:22:53.115711', 'step': 5114, 'epoch': 1} {'type': 'loss', 'content': 0.12274083495140076, 'timestamp': '2025-10-01 04:22:53.118458', 'step': 5115, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:53.171439', 'step': 5115, 'epoch': 1} {'type': 'loss', 'content': 0.1835012286901474, 'timestamp': '2025-10-01 04:22:53.177382', 'step': 5116, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:22:53.231173', 'step': 5116, 'epoch': 1} {'type': 'loss', 'content': 0.12001600116491318, 'timestamp': '2025-10-01 04:22:53.234049', 'step': 5117, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:53.288006', 'step': 5117, 'epoch': 1} {'type': 'loss', 'content': 0.2352369874715805, 'timestamp': '2025-10-01 04:22:53.290232', 'step': 5118, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:53.344628', 'step': 5118, 'epoch': 1} {'type': 'loss', 'content': 0.21942263841629028, 'timestamp': '2025-10-01 04:22:53.346868', 'step': 5119, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:53.399881', 'step': 5119, 'epoch': 1} {'type': 'loss', 'content': 0.20631363987922668, 'timestamp': '2025-10-01 04:22:53.406042', 'step': 5120, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:53.458737', 'step': 5120, 'epoch': 1} {'type': 'loss', 'content': 0.15485233068466187, 'timestamp': '2025-10-01 04:22:53.460740', 'step': 5121, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:53.513423', 'step': 5121, 'epoch': 1} {'type': 'loss', 'content': 0.21639621257781982, 'timestamp': '2025-10-01 04:22:53.515908', 'step': 5122, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:22:53.570362', 'step': 5122, 'epoch': 1} {'type': 'loss', 'content': 0.13370472192764282, 'timestamp': '2025-10-01 04:22:53.573940', 'step': 5123, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:22:53.628203', 'step': 5123, 'epoch': 1} {'type': 'loss', 'content': 0.11518601328134537, 'timestamp': '2025-10-01 04:22:53.634903', 'step': 5124, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:22:53.688881', 'step': 5124, 'epoch': 1} {'type': 'loss', 'content': 0.16191859543323517, 'timestamp': '2025-10-01 04:22:53.691431', 'step': 5125, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:22:53.745942', 'step': 5125, 'epoch': 1} {'type': 'loss', 'content': 0.17466813325881958, 'timestamp': '2025-10-01 04:22:53.748653', 'step': 5126, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-01 04:23:07.052406', 'step': 5126, 'epoch': 1} {'type': 'pplx', 'content': 12726.695761009034, 'timestamp': '2025-10-01 04:23:07.055300', 'step': 5126, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:07.108986', 'step': 5126, 'epoch': 1} {'type': 'loss', 'content': 0.10837766528129578, 'timestamp': '2025-10-01 04:23:07.111504', 'step': 5127, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:07.166011', 'step': 5127, 'epoch': 1} {'type': 'loss', 'content': 0.265697717666626, 'timestamp': '2025-10-01 04:23:07.171947', 'step': 5128, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:07.224764', 'step': 5128, 'epoch': 1} {'type': 'loss', 'content': 0.12483793497085571, 'timestamp': '2025-10-01 04:23:07.227262', 'step': 5129, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:23:07.285391', 'step': 5129, 'epoch': 1} {'type': 'loss', 'content': 0.1481773555278778, 'timestamp': '2025-10-01 04:23:07.289669', 'step': 5130, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:07.359820', 'step': 5130, 'epoch': 1} {'type': 'loss', 'content': 0.14487041532993317, 'timestamp': '2025-10-01 04:23:07.368768', 'step': 5131, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:07.421503', 'step': 5131, 'epoch': 1} {'type': 'loss', 'content': 0.12680953741073608, 'timestamp': '2025-10-01 04:23:07.427758', 'step': 5132, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:07.480439', 'step': 5132, 'epoch': 1} {'type': 'loss', 'content': 0.1826128512620926, 'timestamp': '2025-10-01 04:23:07.495408', 'step': 5133, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:07.557670', 'step': 5133, 'epoch': 1} {'type': 'loss', 'content': 0.15290921926498413, 'timestamp': '2025-10-01 04:23:07.559951', 'step': 5134, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:07.623563', 'step': 5134, 'epoch': 1} {'type': 'loss', 'content': 0.18427041172981262, 'timestamp': '2025-10-01 04:23:07.625828', 'step': 5135, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:07.679830', 'step': 5135, 'epoch': 1} {'type': 'loss', 'content': 0.1510196328163147, 'timestamp': '2025-10-01 04:23:07.696178', 'step': 5136, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:07.748301', 'step': 5136, 'epoch': 1} {'type': 'loss', 'content': 0.12719208002090454, 'timestamp': '2025-10-01 04:23:07.750564', 'step': 5137, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:07.804243', 'step': 5137, 'epoch': 1} {'type': 'loss', 'content': 0.242510586977005, 'timestamp': '2025-10-01 04:23:07.807041', 'step': 5138, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:07.860341', 'step': 5138, 'epoch': 1} {'type': 'loss', 'content': 0.21688306331634521, 'timestamp': '2025-10-01 04:23:07.870045', 'step': 5139, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:23:07.924660', 'step': 5139, 'epoch': 1} {'type': 'loss', 'content': 0.15949834883213043, 'timestamp': '2025-10-01 04:23:07.931478', 'step': 5140, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:07.985767', 'step': 5140, 'epoch': 1} {'type': 'loss', 'content': 0.1704140454530716, 'timestamp': '2025-10-01 04:23:07.991836', 'step': 5141, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:08.045342', 'step': 5141, 'epoch': 1} {'type': 'loss', 'content': 0.13916103541851044, 'timestamp': '2025-10-01 04:23:08.053358', 'step': 5142, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:08.127943', 'step': 5142, 'epoch': 1} {'type': 'loss', 'content': 0.14161236584186554, 'timestamp': '2025-10-01 04:23:08.135682', 'step': 5143, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:08.189134', 'step': 5143, 'epoch': 1} {'type': 'loss', 'content': 0.1398049294948578, 'timestamp': '2025-10-01 04:23:08.195027', 'step': 5144, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:08.247148', 'step': 5144, 'epoch': 1} {'type': 'loss', 'content': 0.12805885076522827, 'timestamp': '2025-10-01 04:23:08.249531', 'step': 5145, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:08.302582', 'step': 5145, 'epoch': 1} {'type': 'loss', 'content': 0.12618018686771393, 'timestamp': '2025-10-01 04:23:08.305051', 'step': 5146, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:08.360883', 'step': 5146, 'epoch': 1} {'type': 'loss', 'content': 0.1330655962228775, 'timestamp': '2025-10-01 04:23:08.363639', 'step': 5147, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:08.418429', 'step': 5147, 'epoch': 1} {'type': 'loss', 'content': 0.20525462925434113, 'timestamp': '2025-10-01 04:23:08.424387', 'step': 5148, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:08.476987', 'step': 5148, 'epoch': 1} {'type': 'loss', 'content': 0.05116216838359833, 'timestamp': '2025-10-01 04:23:08.479116', 'step': 5149, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:08.532213', 'step': 5149, 'epoch': 1} {'type': 'loss', 'content': 0.24294643104076385, 'timestamp': '2025-10-01 04:23:08.534441', 'step': 5150, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:23:08.591057', 'step': 5150, 'epoch': 1} {'type': 'loss', 'content': 0.07958890497684479, 'timestamp': '2025-10-01 04:23:08.593313', 'step': 5151, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:23:08.646136', 'step': 5151, 'epoch': 1} {'type': 'loss', 'content': 0.14699071645736694, 'timestamp': '2025-10-01 04:23:08.652084', 'step': 5152, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:23:08.704736', 'step': 5152, 'epoch': 1} {'type': 'loss', 'content': 0.1777820736169815, 'timestamp': '2025-10-01 04:23:08.707096', 'step': 5153, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:08.760355', 'step': 5153, 'epoch': 1} {'type': 'loss', 'content': 0.16086632013320923, 'timestamp': '2025-10-01 04:23:08.763786', 'step': 5154, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:23:08.818400', 'step': 5154, 'epoch': 1} {'type': 'loss', 'content': 0.17897751927375793, 'timestamp': '2025-10-01 04:23:08.820839', 'step': 5155, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:08.874305', 'step': 5155, 'epoch': 1} {'type': 'loss', 'content': 0.11637986451387405, 'timestamp': '2025-10-01 04:23:08.880277', 'step': 5156, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:08.932249', 'step': 5156, 'epoch': 1} {'type': 'loss', 'content': 0.15403805673122406, 'timestamp': '2025-10-01 04:23:08.934094', 'step': 5157, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:08.987071', 'step': 5157, 'epoch': 1} {'type': 'loss', 'content': 0.1664489209651947, 'timestamp': '2025-10-01 04:23:08.989293', 'step': 5158, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:09.041778', 'step': 5158, 'epoch': 1} {'type': 'loss', 'content': 0.15658646821975708, 'timestamp': '2025-10-01 04:23:09.048333', 'step': 5159, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:09.104160', 'step': 5159, 'epoch': 1} {'type': 'loss', 'content': 0.17209060490131378, 'timestamp': '2025-10-01 04:23:09.110036', 'step': 5160, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:09.161798', 'step': 5160, 'epoch': 1} {'type': 'loss', 'content': 0.15081579983234406, 'timestamp': '2025-10-01 04:23:09.167693', 'step': 5161, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:09.222232', 'step': 5161, 'epoch': 1} {'type': 'loss', 'content': 0.1874174326658249, 'timestamp': '2025-10-01 04:23:09.224974', 'step': 5162, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:23:09.281736', 'step': 5162, 'epoch': 1} {'type': 'loss', 'content': 0.20726656913757324, 'timestamp': '2025-10-01 04:23:09.283825', 'step': 5163, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:09.336303', 'step': 5163, 'epoch': 1} {'type': 'loss', 'content': 0.2077956348657608, 'timestamp': '2025-10-01 04:23:09.342091', 'step': 5164, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:23:09.394423', 'step': 5164, 'epoch': 1} {'type': 'loss', 'content': 0.20168083906173706, 'timestamp': '2025-10-01 04:23:09.396636', 'step': 5165, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:09.449628', 'step': 5165, 'epoch': 1} {'type': 'loss', 'content': 0.10570524632930756, 'timestamp': '2025-10-01 04:23:09.451797', 'step': 5166, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:09.505016', 'step': 5166, 'epoch': 1} {'type': 'loss', 'content': 0.19614972174167633, 'timestamp': '2025-10-01 04:23:09.507309', 'step': 5167, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:09.567327', 'step': 5167, 'epoch': 1} {'type': 'loss', 'content': 0.09600719809532166, 'timestamp': '2025-10-01 04:23:09.573838', 'step': 5168, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:23:09.631560', 'step': 5168, 'epoch': 1} {'type': 'loss', 'content': 0.237360879778862, 'timestamp': '2025-10-01 04:23:09.633951', 'step': 5169, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:09.687151', 'step': 5169, 'epoch': 1} {'type': 'loss', 'content': 0.19351984560489655, 'timestamp': '2025-10-01 04:23:09.690220', 'step': 5170, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:09.743396', 'step': 5170, 'epoch': 1} {'type': 'loss', 'content': 0.21844696998596191, 'timestamp': '2025-10-01 04:23:09.745710', 'step': 5171, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:09.802167', 'step': 5171, 'epoch': 1} {'type': 'loss', 'content': 0.19416533410549164, 'timestamp': '2025-10-01 04:23:09.807976', 'step': 5172, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:09.860575', 'step': 5172, 'epoch': 1} {'type': 'loss', 'content': 0.1309957355260849, 'timestamp': '2025-10-01 04:23:09.870779', 'step': 5173, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:09.928687', 'step': 5173, 'epoch': 1} {'type': 'loss', 'content': 0.15988600254058838, 'timestamp': '2025-10-01 04:23:09.937352', 'step': 5174, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:23:09.992937', 'step': 5174, 'epoch': 1} {'type': 'loss', 'content': 0.17918752133846283, 'timestamp': '2025-10-01 04:23:09.995155', 'step': 5175, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:10.050805', 'step': 5175, 'epoch': 1} {'type': 'loss', 'content': 0.1479867547750473, 'timestamp': '2025-10-01 04:23:10.056722', 'step': 5176, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-01 04:23:10.109638', 'step': 5176, 'epoch': 1} {'type': 'loss', 'content': 0.15478552877902985, 'timestamp': '2025-10-01 04:23:10.111838', 'step': 5177, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:23:10.165543', 'step': 5177, 'epoch': 1} {'type': 'loss', 'content': 0.14624781906604767, 'timestamp': '2025-10-01 04:23:10.167881', 'step': 5178, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:10.228376', 'step': 5178, 'epoch': 1} {'type': 'loss', 'content': 0.15947911143302917, 'timestamp': '2025-10-01 04:23:10.231355', 'step': 5179, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:10.285217', 'step': 5179, 'epoch': 1} {'type': 'loss', 'content': 0.18831993639469147, 'timestamp': '2025-10-01 04:23:10.291388', 'step': 5180, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:10.351612', 'step': 5180, 'epoch': 1} {'type': 'loss', 'content': 0.14518874883651733, 'timestamp': '2025-10-01 04:23:10.353835', 'step': 5181, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:10.406861', 'step': 5181, 'epoch': 1} {'type': 'loss', 'content': 0.1775764375925064, 'timestamp': '2025-10-01 04:23:10.409089', 'step': 5182, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:10.462204', 'step': 5182, 'epoch': 1} {'type': 'loss', 'content': 0.15119118988513947, 'timestamp': '2025-10-01 04:23:10.473654', 'step': 5183, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:10.526655', 'step': 5183, 'epoch': 1} {'type': 'loss', 'content': 0.19499677419662476, 'timestamp': '2025-10-01 04:23:10.532675', 'step': 5184, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:10.586093', 'step': 5184, 'epoch': 1} {'type': 'loss', 'content': 0.21076835691928864, 'timestamp': '2025-10-01 04:23:10.588231', 'step': 5185, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:10.641237', 'step': 5185, 'epoch': 1} {'type': 'loss', 'content': 0.17590972781181335, 'timestamp': '2025-10-01 04:23:10.643398', 'step': 5186, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:10.695999', 'step': 5186, 'epoch': 1} {'type': 'loss', 'content': 0.1542842984199524, 'timestamp': '2025-10-01 04:23:10.698669', 'step': 5187, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:10.755045', 'step': 5187, 'epoch': 1} {'type': 'loss', 'content': 0.24281489849090576, 'timestamp': '2025-10-01 04:23:10.760890', 'step': 5188, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:10.813039', 'step': 5188, 'epoch': 1} {'type': 'loss', 'content': 0.19564539194107056, 'timestamp': '2025-10-01 04:23:10.815277', 'step': 5189, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:10.868463', 'step': 5189, 'epoch': 1} {'type': 'loss', 'content': 0.17135587334632874, 'timestamp': '2025-10-01 04:23:10.870670', 'step': 5190, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:10.926103', 'step': 5190, 'epoch': 1} {'type': 'loss', 'content': 0.13307183980941772, 'timestamp': '2025-10-01 04:23:10.928393', 'step': 5191, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:10.981003', 'step': 5191, 'epoch': 1} {'type': 'loss', 'content': 0.18809208273887634, 'timestamp': '2025-10-01 04:23:10.986858', 'step': 5192, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:23:11.041650', 'step': 5192, 'epoch': 1} {'type': 'loss', 'content': 0.15247221291065216, 'timestamp': '2025-10-01 04:23:11.043816', 'step': 5193, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:11.097588', 'step': 5193, 'epoch': 1} {'type': 'loss', 'content': 0.195084348320961, 'timestamp': '2025-10-01 04:23:11.100113', 'step': 5194, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:23:11.152953', 'step': 5194, 'epoch': 1} {'type': 'loss', 'content': 0.12326338142156601, 'timestamp': '2025-10-01 04:23:11.156685', 'step': 5195, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:11.211198', 'step': 5195, 'epoch': 1} {'type': 'loss', 'content': 0.12467878311872482, 'timestamp': '2025-10-01 04:23:11.217258', 'step': 5196, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:23:11.270124', 'step': 5196, 'epoch': 1} {'type': 'loss', 'content': 0.26389482617378235, 'timestamp': '2025-10-01 04:23:11.272501', 'step': 5197, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:11.325888', 'step': 5197, 'epoch': 1} {'type': 'loss', 'content': 0.22589410841464996, 'timestamp': '2025-10-01 04:23:11.328176', 'step': 5198, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:11.381918', 'step': 5198, 'epoch': 1} {'type': 'loss', 'content': 0.15972398221492767, 'timestamp': '2025-10-01 04:23:11.384588', 'step': 5199, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:11.437904', 'step': 5199, 'epoch': 1} {'type': 'loss', 'content': 0.19878165423870087, 'timestamp': '2025-10-01 04:23:11.445244', 'step': 5200, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:11.500577', 'step': 5200, 'epoch': 1} {'type': 'loss', 'content': 0.2507258355617523, 'timestamp': '2025-10-01 04:23:11.502843', 'step': 5201, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:11.555831', 'step': 5201, 'epoch': 1} {'type': 'loss', 'content': 0.21166476607322693, 'timestamp': '2025-10-01 04:23:11.558268', 'step': 5202, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:23:11.612464', 'step': 5202, 'epoch': 1} {'type': 'loss', 'content': 0.12650573253631592, 'timestamp': '2025-10-01 04:23:11.614887', 'step': 5203, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:11.672190', 'step': 5203, 'epoch': 1} {'type': 'loss', 'content': 0.11377651244401932, 'timestamp': '2025-10-01 04:23:11.677669', 'step': 5204, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:11.732230', 'step': 5204, 'epoch': 1} {'type': 'loss', 'content': 0.11904793232679367, 'timestamp': '2025-10-01 04:23:11.734823', 'step': 5205, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:23:11.787661', 'step': 5205, 'epoch': 1} {'type': 'loss', 'content': 0.15061138570308685, 'timestamp': '2025-10-01 04:23:11.789841', 'step': 5206, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:11.842613', 'step': 5206, 'epoch': 1} {'type': 'loss', 'content': 0.14759069681167603, 'timestamp': '2025-10-01 04:23:11.844591', 'step': 5207, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:23:11.897484', 'step': 5207, 'epoch': 1} {'type': 'loss', 'content': 0.20091038942337036, 'timestamp': '2025-10-01 04:23:11.903289', 'step': 5208, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:11.960813', 'step': 5208, 'epoch': 1} {'type': 'loss', 'content': 0.2237611562013626, 'timestamp': '2025-10-01 04:23:11.963127', 'step': 5209, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:12.023914', 'step': 5209, 'epoch': 1} {'type': 'loss', 'content': 0.18024003505706787, 'timestamp': '2025-10-01 04:23:12.026866', 'step': 5210, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:12.079562', 'step': 5210, 'epoch': 1} {'type': 'loss', 'content': 0.13683593273162842, 'timestamp': '2025-10-01 04:23:12.081841', 'step': 5211, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:12.135932', 'step': 5211, 'epoch': 1} {'type': 'loss', 'content': 0.18913699686527252, 'timestamp': '2025-10-01 04:23:12.141831', 'step': 5212, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:12.194319', 'step': 5212, 'epoch': 1} {'type': 'loss', 'content': 0.15499189496040344, 'timestamp': '2025-10-01 04:23:12.196771', 'step': 5213, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:12.250212', 'step': 5213, 'epoch': 1} {'type': 'loss', 'content': 0.21027728915214539, 'timestamp': '2025-10-01 04:23:12.252579', 'step': 5214, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:12.307634', 'step': 5214, 'epoch': 1} {'type': 'loss', 'content': 0.14490310847759247, 'timestamp': '2025-10-01 04:23:12.309809', 'step': 5215, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:12.363329', 'step': 5215, 'epoch': 1} {'type': 'loss', 'content': 0.1632470190525055, 'timestamp': '2025-10-01 04:23:12.369265', 'step': 5216, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:12.424854', 'step': 5216, 'epoch': 1} {'type': 'loss', 'content': 0.1704387068748474, 'timestamp': '2025-10-01 04:23:12.427551', 'step': 5217, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:12.491188', 'step': 5217, 'epoch': 1} {'type': 'loss', 'content': 0.17597264051437378, 'timestamp': '2025-10-01 04:23:12.493438', 'step': 5218, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:12.548603', 'step': 5218, 'epoch': 1} {'type': 'loss', 'content': 0.13278931379318237, 'timestamp': '2025-10-01 04:23:12.551372', 'step': 5219, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:12.605236', 'step': 5219, 'epoch': 1} {'type': 'loss', 'content': 0.1835825890302658, 'timestamp': '2025-10-01 04:23:12.611125', 'step': 5220, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:23:12.665262', 'step': 5220, 'epoch': 1} {'type': 'loss', 'content': 0.22880351543426514, 'timestamp': '2025-10-01 04:23:12.668060', 'step': 5221, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:12.725986', 'step': 5221, 'epoch': 1} {'type': 'loss', 'content': 0.25933757424354553, 'timestamp': '2025-10-01 04:23:12.728200', 'step': 5222, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:12.781399', 'step': 5222, 'epoch': 1} {'type': 'loss', 'content': 0.08468630909919739, 'timestamp': '2025-10-01 04:23:12.783662', 'step': 5223, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:12.836482', 'step': 5223, 'epoch': 1} {'type': 'loss', 'content': 0.1645437479019165, 'timestamp': '2025-10-01 04:23:12.842618', 'step': 5224, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:12.895472', 'step': 5224, 'epoch': 1} {'type': 'loss', 'content': 0.11883515119552612, 'timestamp': '2025-10-01 04:23:12.898246', 'step': 5225, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:12.951316', 'step': 5225, 'epoch': 1} {'type': 'loss', 'content': 0.112835593521595, 'timestamp': '2025-10-01 04:23:12.953460', 'step': 5226, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:13.007405', 'step': 5226, 'epoch': 1} {'type': 'loss', 'content': 0.18597781658172607, 'timestamp': '2025-10-01 04:23:13.009905', 'step': 5227, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:13.071853', 'step': 5227, 'epoch': 1} {'type': 'loss', 'content': 0.10272242873907089, 'timestamp': '2025-10-01 04:23:13.077789', 'step': 5228, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:13.133554', 'step': 5228, 'epoch': 1} {'type': 'loss', 'content': 0.19980742037296295, 'timestamp': '2025-10-01 04:23:13.135895', 'step': 5229, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:13.189581', 'step': 5229, 'epoch': 1} {'type': 'loss', 'content': 0.1854073405265808, 'timestamp': '2025-10-01 04:23:13.191741', 'step': 5230, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:13.244777', 'step': 5230, 'epoch': 1} {'type': 'loss', 'content': 0.09365569800138474, 'timestamp': '2025-10-01 04:23:13.248266', 'step': 5231, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:23:13.301027', 'step': 5231, 'epoch': 1} {'type': 'loss', 'content': 0.21439781785011292, 'timestamp': '2025-10-01 04:23:13.308173', 'step': 5232, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:13.375240', 'step': 5232, 'epoch': 1} {'type': 'loss', 'content': 0.15274949371814728, 'timestamp': '2025-10-01 04:23:13.377754', 'step': 5233, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:13.429981', 'step': 5233, 'epoch': 1} {'type': 'loss', 'content': 0.2754689157009125, 'timestamp': '2025-10-01 04:23:13.432271', 'step': 5234, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:13.485207', 'step': 5234, 'epoch': 1} {'type': 'loss', 'content': 0.06138026341795921, 'timestamp': '2025-10-01 04:23:13.487554', 'step': 5235, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:13.540628', 'step': 5235, 'epoch': 1} {'type': 'loss', 'content': 0.1333186775445938, 'timestamp': '2025-10-01 04:23:13.546247', 'step': 5236, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:13.599749', 'step': 5236, 'epoch': 1} {'type': 'loss', 'content': 0.11256159096956253, 'timestamp': '2025-10-01 04:23:13.601893', 'step': 5237, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:23:13.654349', 'step': 5237, 'epoch': 1} {'type': 'loss', 'content': 0.23098038136959076, 'timestamp': '2025-10-01 04:23:13.656796', 'step': 5238, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:13.709952', 'step': 5238, 'epoch': 1} {'type': 'loss', 'content': 0.1222216784954071, 'timestamp': '2025-10-01 04:23:13.711831', 'step': 5239, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:13.769147', 'step': 5239, 'epoch': 1} {'type': 'loss', 'content': 0.22194409370422363, 'timestamp': '2025-10-01 04:23:13.774864', 'step': 5240, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:13.836020', 'step': 5240, 'epoch': 1} {'type': 'loss', 'content': 0.15372727811336517, 'timestamp': '2025-10-01 04:23:13.838873', 'step': 5241, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:13.910794', 'step': 5241, 'epoch': 1} {'type': 'loss', 'content': 0.15225136280059814, 'timestamp': '2025-10-01 04:23:13.913072', 'step': 5242, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:13.967446', 'step': 5242, 'epoch': 1} {'type': 'loss', 'content': 0.14617015421390533, 'timestamp': '2025-10-01 04:23:13.969778', 'step': 5243, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:23:14.022587', 'step': 5243, 'epoch': 1} {'type': 'loss', 'content': 0.15252640843391418, 'timestamp': '2025-10-01 04:23:14.028379', 'step': 5244, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:14.081228', 'step': 5244, 'epoch': 1} {'type': 'loss', 'content': 0.0800866186618805, 'timestamp': '2025-10-01 04:23:14.083483', 'step': 5245, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:14.136644', 'step': 5245, 'epoch': 1} {'type': 'loss', 'content': 0.14206458628177643, 'timestamp': '2025-10-01 04:23:14.139050', 'step': 5246, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:14.192487', 'step': 5246, 'epoch': 1} {'type': 'loss', 'content': 0.17022526264190674, 'timestamp': '2025-10-01 04:23:14.194786', 'step': 5247, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:23:14.256852', 'step': 5247, 'epoch': 1} {'type': 'loss', 'content': 0.128456711769104, 'timestamp': '2025-10-01 04:23:14.262842', 'step': 5248, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:14.315758', 'step': 5248, 'epoch': 1} {'type': 'loss', 'content': 0.16797560453414917, 'timestamp': '2025-10-01 04:23:14.317955', 'step': 5249, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:14.371057', 'step': 5249, 'epoch': 1} {'type': 'loss', 'content': 0.171975776553154, 'timestamp': '2025-10-01 04:23:14.373763', 'step': 5250, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:14.427108', 'step': 5250, 'epoch': 1} {'type': 'loss', 'content': 0.10401798039674759, 'timestamp': '2025-10-01 04:23:14.429283', 'step': 5251, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:14.482614', 'step': 5251, 'epoch': 1} {'type': 'loss', 'content': 0.15634065866470337, 'timestamp': '2025-10-01 04:23:14.489068', 'step': 5252, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:14.541734', 'step': 5252, 'epoch': 1} {'type': 'loss', 'content': 0.24687449634075165, 'timestamp': '2025-10-01 04:23:14.543873', 'step': 5253, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:14.597208', 'step': 5253, 'epoch': 1} {'type': 'loss', 'content': 0.18676204979419708, 'timestamp': '2025-10-01 04:23:14.600965', 'step': 5254, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:14.658272', 'step': 5254, 'epoch': 1} {'type': 'loss', 'content': 0.16698291897773743, 'timestamp': '2025-10-01 04:23:14.660609', 'step': 5255, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:14.723542', 'step': 5255, 'epoch': 1} {'type': 'loss', 'content': 0.16109627485275269, 'timestamp': '2025-10-01 04:23:14.729532', 'step': 5256, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:23:14.788218', 'step': 5256, 'epoch': 1} {'type': 'loss', 'content': 0.2226632535457611, 'timestamp': '2025-10-01 04:23:14.790296', 'step': 5257, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:14.843688', 'step': 5257, 'epoch': 1} {'type': 'loss', 'content': 0.1101495772600174, 'timestamp': '2025-10-01 04:23:14.846183', 'step': 5258, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:14.901658', 'step': 5258, 'epoch': 1} {'type': 'loss', 'content': 0.06093023344874382, 'timestamp': '2025-10-01 04:23:14.904067', 'step': 5259, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:14.961088', 'step': 5259, 'epoch': 1} {'type': 'loss', 'content': 0.1272343397140503, 'timestamp': '2025-10-01 04:23:14.967260', 'step': 5260, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:23:15.020235', 'step': 5260, 'epoch': 1} {'type': 'loss', 'content': 0.11321485787630081, 'timestamp': '2025-10-01 04:23:15.022760', 'step': 5261, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:15.076460', 'step': 5261, 'epoch': 1} {'type': 'loss', 'content': 0.10759418457746506, 'timestamp': '2025-10-01 04:23:15.079310', 'step': 5262, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:15.134026', 'step': 5262, 'epoch': 1} {'type': 'loss', 'content': 0.0750579684972763, 'timestamp': '2025-10-01 04:23:15.139098', 'step': 5263, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:15.195519', 'step': 5263, 'epoch': 1} {'type': 'loss', 'content': 0.15809956192970276, 'timestamp': '2025-10-01 04:23:15.201238', 'step': 5264, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:15.253783', 'step': 5264, 'epoch': 1} {'type': 'loss', 'content': 0.28074347972869873, 'timestamp': '2025-10-01 04:23:15.256473', 'step': 5265, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:15.309328', 'step': 5265, 'epoch': 1} {'type': 'loss', 'content': 0.20256327092647552, 'timestamp': '2025-10-01 04:23:15.311521', 'step': 5266, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:23:15.366646', 'step': 5266, 'epoch': 1} {'type': 'loss', 'content': 0.13833346962928772, 'timestamp': '2025-10-01 04:23:15.369608', 'step': 5267, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:15.426384', 'step': 5267, 'epoch': 1} {'type': 'loss', 'content': 0.10214298963546753, 'timestamp': '2025-10-01 04:23:15.432421', 'step': 5268, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:23:15.486155', 'step': 5268, 'epoch': 1} {'type': 'loss', 'content': 0.19001206755638123, 'timestamp': '2025-10-01 04:23:15.488295', 'step': 5269, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:15.541689', 'step': 5269, 'epoch': 1} {'type': 'loss', 'content': 0.14066484570503235, 'timestamp': '2025-10-01 04:23:15.544406', 'step': 5270, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:15.599617', 'step': 5270, 'epoch': 1} {'type': 'loss', 'content': 0.20632967352867126, 'timestamp': '2025-10-01 04:23:15.601905', 'step': 5271, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:15.655950', 'step': 5271, 'epoch': 1} {'type': 'loss', 'content': 0.10378934442996979, 'timestamp': '2025-10-01 04:23:15.662137', 'step': 5272, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:23:15.715545', 'step': 5272, 'epoch': 1} {'type': 'loss', 'content': 0.18615949153900146, 'timestamp': '2025-10-01 04:23:15.717872', 'step': 5273, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:15.770744', 'step': 5273, 'epoch': 1} {'type': 'loss', 'content': 0.2659071385860443, 'timestamp': '2025-10-01 04:23:15.773523', 'step': 5274, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:23:15.827116', 'step': 5274, 'epoch': 1} {'type': 'loss', 'content': 0.25262290239334106, 'timestamp': '2025-10-01 04:23:15.831801', 'step': 5275, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:15.885686', 'step': 5275, 'epoch': 1} {'type': 'loss', 'content': 0.1563929170370102, 'timestamp': '2025-10-01 04:23:15.891412', 'step': 5276, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:15.943748', 'step': 5276, 'epoch': 1} {'type': 'loss', 'content': 0.12235245853662491, 'timestamp': '2025-10-01 04:23:15.945772', 'step': 5277, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:16.000409', 'step': 5277, 'epoch': 1} {'type': 'loss', 'content': 0.1510804295539856, 'timestamp': '2025-10-01 04:23:16.003923', 'step': 5278, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:16.057504', 'step': 5278, 'epoch': 1} {'type': 'loss', 'content': 0.3296760618686676, 'timestamp': '2025-10-01 04:23:16.060492', 'step': 5279, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:16.113468', 'step': 5279, 'epoch': 1} {'type': 'loss', 'content': 0.12397924065589905, 'timestamp': '2025-10-01 04:23:16.124288', 'step': 5280, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:16.176777', 'step': 5280, 'epoch': 1} {'type': 'loss', 'content': 0.18371661007404327, 'timestamp': '2025-10-01 04:23:16.181025', 'step': 5281, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:23:16.235464', 'step': 5281, 'epoch': 1} {'type': 'loss', 'content': 0.19387972354888916, 'timestamp': '2025-10-01 04:23:16.237758', 'step': 5282, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:23:16.292222', 'step': 5282, 'epoch': 1} {'type': 'loss', 'content': 0.1297711580991745, 'timestamp': '2025-10-01 04:23:16.294367', 'step': 5283, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:16.348566', 'step': 5283, 'epoch': 1} {'type': 'loss', 'content': 0.12290496379137039, 'timestamp': '2025-10-01 04:23:16.354659', 'step': 5284, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:16.407796', 'step': 5284, 'epoch': 1} {'type': 'loss', 'content': 0.2028551995754242, 'timestamp': '2025-10-01 04:23:16.410541', 'step': 5285, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:23:16.464707', 'step': 5285, 'epoch': 1} {'type': 'loss', 'content': 0.2536218464374542, 'timestamp': '2025-10-01 04:23:16.467928', 'step': 5286, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:23:16.524863', 'step': 5286, 'epoch': 1} {'type': 'loss', 'content': 0.14852352440357208, 'timestamp': '2025-10-01 04:23:16.527089', 'step': 5287, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:16.579891', 'step': 5287, 'epoch': 1} {'type': 'loss', 'content': 0.11291114240884781, 'timestamp': '2025-10-01 04:23:16.586770', 'step': 5288, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:16.640677', 'step': 5288, 'epoch': 1} {'type': 'loss', 'content': 0.27758297324180603, 'timestamp': '2025-10-01 04:23:16.643280', 'step': 5289, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:16.696679', 'step': 5289, 'epoch': 1} {'type': 'loss', 'content': 0.13331685960292816, 'timestamp': '2025-10-01 04:23:16.698630', 'step': 5290, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:23:16.751810', 'step': 5290, 'epoch': 1} {'type': 'loss', 'content': 0.26862287521362305, 'timestamp': '2025-10-01 04:23:16.755098', 'step': 5291, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:16.811250', 'step': 5291, 'epoch': 1} {'type': 'loss', 'content': 0.19202134013175964, 'timestamp': '2025-10-01 04:23:16.817047', 'step': 5292, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:16.869914', 'step': 5292, 'epoch': 1} {'type': 'loss', 'content': 0.2371208816766739, 'timestamp': '2025-10-01 04:23:16.872654', 'step': 5293, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:16.928169', 'step': 5293, 'epoch': 1} {'type': 'loss', 'content': 0.16095732152462006, 'timestamp': '2025-10-01 04:23:16.930545', 'step': 5294, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:16.984136', 'step': 5294, 'epoch': 1} {'type': 'loss', 'content': 0.10967999696731567, 'timestamp': '2025-10-01 04:23:16.987339', 'step': 5295, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:23:17.041003', 'step': 5295, 'epoch': 1} {'type': 'loss', 'content': 0.13443917036056519, 'timestamp': '2025-10-01 04:23:17.050129', 'step': 5296, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:17.118144', 'step': 5296, 'epoch': 1} {'type': 'loss', 'content': 0.1780162900686264, 'timestamp': '2025-10-01 04:23:17.120244', 'step': 5297, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:17.173370', 'step': 5297, 'epoch': 1} {'type': 'loss', 'content': 0.170126274228096, 'timestamp': '2025-10-01 04:23:17.175604', 'step': 5298, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:17.229345', 'step': 5298, 'epoch': 1} {'type': 'loss', 'content': 0.17260333895683289, 'timestamp': '2025-10-01 04:23:17.231948', 'step': 5299, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:17.285524', 'step': 5299, 'epoch': 1} {'type': 'loss', 'content': 0.15420080721378326, 'timestamp': '2025-10-01 04:23:17.291534', 'step': 5300, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:17.344039', 'step': 5300, 'epoch': 1} {'type': 'loss', 'content': 0.12275374680757523, 'timestamp': '2025-10-01 04:23:17.346176', 'step': 5301, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:23:17.399367', 'step': 5301, 'epoch': 1} {'type': 'loss', 'content': 0.193034365773201, 'timestamp': '2025-10-01 04:23:17.401503', 'step': 5302, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:17.454403', 'step': 5302, 'epoch': 1} {'type': 'loss', 'content': 0.24721114337444305, 'timestamp': '2025-10-01 04:23:17.456617', 'step': 5303, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:17.509171', 'step': 5303, 'epoch': 1} {'type': 'loss', 'content': 0.11075921356678009, 'timestamp': '2025-10-01 04:23:17.514858', 'step': 5304, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:17.567618', 'step': 5304, 'epoch': 1} {'type': 'loss', 'content': 0.11442182958126068, 'timestamp': '2025-10-01 04:23:17.569780', 'step': 5305, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:17.622631', 'step': 5305, 'epoch': 1} {'type': 'loss', 'content': 0.1994982808828354, 'timestamp': '2025-10-01 04:23:17.624827', 'step': 5306, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:17.677807', 'step': 5306, 'epoch': 1} {'type': 'loss', 'content': 0.12863942980766296, 'timestamp': '2025-10-01 04:23:17.679902', 'step': 5307, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:17.732600', 'step': 5307, 'epoch': 1} {'type': 'loss', 'content': 0.14856916666030884, 'timestamp': '2025-10-01 04:23:17.738820', 'step': 5308, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:17.792013', 'step': 5308, 'epoch': 1} {'type': 'loss', 'content': 0.1646534651517868, 'timestamp': '2025-10-01 04:23:17.794199', 'step': 5309, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:23:17.852154', 'step': 5309, 'epoch': 1} {'type': 'loss', 'content': 0.16222691535949707, 'timestamp': '2025-10-01 04:23:17.854401', 'step': 5310, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:17.909284', 'step': 5310, 'epoch': 1} {'type': 'loss', 'content': 0.09356501698493958, 'timestamp': '2025-10-01 04:23:17.911578', 'step': 5311, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:17.964255', 'step': 5311, 'epoch': 1} {'type': 'loss', 'content': 0.15627546608448029, 'timestamp': '2025-10-01 04:23:17.970228', 'step': 5312, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:18.023123', 'step': 5312, 'epoch': 1} {'type': 'loss', 'content': 0.21316832304000854, 'timestamp': '2025-10-01 04:23:18.025521', 'step': 5313, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:18.079217', 'step': 5313, 'epoch': 1} {'type': 'loss', 'content': 0.24636928737163544, 'timestamp': '2025-10-01 04:23:18.081652', 'step': 5314, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:18.137327', 'step': 5314, 'epoch': 1} {'type': 'loss', 'content': 0.11210817843675613, 'timestamp': '2025-10-01 04:23:18.139526', 'step': 5315, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:18.192473', 'step': 5315, 'epoch': 1} {'type': 'loss', 'content': 0.20013830065727234, 'timestamp': '2025-10-01 04:23:18.198211', 'step': 5316, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:23:18.252769', 'step': 5316, 'epoch': 1} {'type': 'loss', 'content': 0.09318049252033234, 'timestamp': '2025-10-01 04:23:18.254996', 'step': 5317, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:18.309364', 'step': 5317, 'epoch': 1} {'type': 'loss', 'content': 0.15134289860725403, 'timestamp': '2025-10-01 04:23:18.311670', 'step': 5318, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:18.364935', 'step': 5318, 'epoch': 1} {'type': 'loss', 'content': 0.15237773954868317, 'timestamp': '2025-10-01 04:23:18.367202', 'step': 5319, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:18.420280', 'step': 5319, 'epoch': 1} {'type': 'loss', 'content': 0.16400621831417084, 'timestamp': '2025-10-01 04:23:18.426023', 'step': 5320, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:18.479229', 'step': 5320, 'epoch': 1} {'type': 'loss', 'content': 0.12806107103824615, 'timestamp': '2025-10-01 04:23:18.481341', 'step': 5321, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:23:18.534557', 'step': 5321, 'epoch': 1} {'type': 'loss', 'content': 0.10353682190179825, 'timestamp': '2025-10-01 04:23:18.536773', 'step': 5322, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:18.589827', 'step': 5322, 'epoch': 1} {'type': 'loss', 'content': 0.1919214129447937, 'timestamp': '2025-10-01 04:23:18.591953', 'step': 5323, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:18.645174', 'step': 5323, 'epoch': 1} {'type': 'loss', 'content': 0.25318434834480286, 'timestamp': '2025-10-01 04:23:18.650839', 'step': 5324, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:18.704236', 'step': 5324, 'epoch': 1} {'type': 'loss', 'content': 0.1747366189956665, 'timestamp': '2025-10-01 04:23:18.706463', 'step': 5325, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:18.759522', 'step': 5325, 'epoch': 1} {'type': 'loss', 'content': 0.12747560441493988, 'timestamp': '2025-10-01 04:23:18.762227', 'step': 5326, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:18.815976', 'step': 5326, 'epoch': 1} {'type': 'loss', 'content': 0.1342432200908661, 'timestamp': '2025-10-01 04:23:18.818893', 'step': 5327, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:18.874073', 'step': 5327, 'epoch': 1} {'type': 'loss', 'content': 0.12559090554714203, 'timestamp': '2025-10-01 04:23:18.880139', 'step': 5328, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:18.933578', 'step': 5328, 'epoch': 1} {'type': 'loss', 'content': 0.23059365153312683, 'timestamp': '2025-10-01 04:23:18.936503', 'step': 5329, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-01 04:23:18.990794', 'step': 5329, 'epoch': 1} {'type': 'loss', 'content': 0.14918899536132812, 'timestamp': '2025-10-01 04:23:18.993286', 'step': 5330, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:19.051993', 'step': 5330, 'epoch': 1} {'type': 'loss', 'content': 0.19019854068756104, 'timestamp': '2025-10-01 04:23:19.054379', 'step': 5331, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:19.108689', 'step': 5331, 'epoch': 1} {'type': 'loss', 'content': 0.1791205108165741, 'timestamp': '2025-10-01 04:23:19.114839', 'step': 5332, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:19.168006', 'step': 5332, 'epoch': 1} {'type': 'loss', 'content': 0.18688398599624634, 'timestamp': '2025-10-01 04:23:19.170647', 'step': 5333, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:19.225203', 'step': 5333, 'epoch': 1} {'type': 'loss', 'content': 0.09764137864112854, 'timestamp': '2025-10-01 04:23:19.227376', 'step': 5334, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:19.281887', 'step': 5334, 'epoch': 1} {'type': 'loss', 'content': 0.23426605761051178, 'timestamp': '2025-10-01 04:23:19.284499', 'step': 5335, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:19.339030', 'step': 5335, 'epoch': 1} {'type': 'loss', 'content': 0.18231511116027832, 'timestamp': '2025-10-01 04:23:19.345127', 'step': 5336, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:19.398736', 'step': 5336, 'epoch': 1} {'type': 'loss', 'content': 0.1853252798318863, 'timestamp': '2025-10-01 04:23:19.400834', 'step': 5337, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:19.455049', 'step': 5337, 'epoch': 1} {'type': 'loss', 'content': 0.2180844396352768, 'timestamp': '2025-10-01 04:23:19.460063', 'step': 5338, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:23:19.515799', 'step': 5338, 'epoch': 1} {'type': 'loss', 'content': 0.16290344297885895, 'timestamp': '2025-10-01 04:23:19.518336', 'step': 5339, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:19.574503', 'step': 5339, 'epoch': 1} {'type': 'loss', 'content': 0.15555088222026825, 'timestamp': '2025-10-01 04:23:19.580841', 'step': 5340, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:19.634528', 'step': 5340, 'epoch': 1} {'type': 'loss', 'content': 0.0795212835073471, 'timestamp': '2025-10-01 04:23:19.637264', 'step': 5341, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:19.691723', 'step': 5341, 'epoch': 1} {'type': 'loss', 'content': 0.04987102001905441, 'timestamp': '2025-10-01 04:23:19.694686', 'step': 5342, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:19.751395', 'step': 5342, 'epoch': 1} {'type': 'loss', 'content': 0.18749387562274933, 'timestamp': '2025-10-01 04:23:19.753933', 'step': 5343, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:19.818005', 'step': 5343, 'epoch': 1} {'type': 'loss', 'content': 0.17336751520633698, 'timestamp': '2025-10-01 04:23:19.825838', 'step': 5344, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:19.879879', 'step': 5344, 'epoch': 1} {'type': 'loss', 'content': 0.20776088535785675, 'timestamp': '2025-10-01 04:23:19.882711', 'step': 5345, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:19.936192', 'step': 5345, 'epoch': 1} {'type': 'loss', 'content': 0.23477187752723694, 'timestamp': '2025-10-01 04:23:19.938978', 'step': 5346, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:19.992867', 'step': 5346, 'epoch': 1} {'type': 'loss', 'content': 0.19738276302814484, 'timestamp': '2025-10-01 04:23:19.996452', 'step': 5347, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:20.050611', 'step': 5347, 'epoch': 1} {'type': 'loss', 'content': 0.21017314493656158, 'timestamp': '2025-10-01 04:23:20.056858', 'step': 5348, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:20.110546', 'step': 5348, 'epoch': 1} {'type': 'loss', 'content': 0.16910745203495026, 'timestamp': '2025-10-01 04:23:20.116030', 'step': 5349, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:23:20.169710', 'step': 5349, 'epoch': 1} {'type': 'loss', 'content': 0.22601617872714996, 'timestamp': '2025-10-01 04:23:20.172436', 'step': 5350, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:20.226908', 'step': 5350, 'epoch': 1} {'type': 'loss', 'content': 0.20769654214382172, 'timestamp': '2025-10-01 04:23:20.229191', 'step': 5351, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:20.283651', 'step': 5351, 'epoch': 1} {'type': 'loss', 'content': 0.2036512792110443, 'timestamp': '2025-10-01 04:23:20.289992', 'step': 5352, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:20.344003', 'step': 5352, 'epoch': 1} {'type': 'loss', 'content': 0.1075567975640297, 'timestamp': '2025-10-01 04:23:20.345984', 'step': 5353, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:23:20.399542', 'step': 5353, 'epoch': 1} {'type': 'loss', 'content': 0.13384878635406494, 'timestamp': '2025-10-01 04:23:20.402218', 'step': 5354, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:20.455108', 'step': 5354, 'epoch': 1} {'type': 'loss', 'content': 0.21219757199287415, 'timestamp': '2025-10-01 04:23:20.457190', 'step': 5355, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:20.509941', 'step': 5355, 'epoch': 1} {'type': 'loss', 'content': 0.19931958615779877, 'timestamp': '2025-10-01 04:23:20.516238', 'step': 5356, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:20.568755', 'step': 5356, 'epoch': 1} {'type': 'loss', 'content': 0.15585441887378693, 'timestamp': '2025-10-01 04:23:20.571306', 'step': 5357, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:23:20.627697', 'step': 5357, 'epoch': 1} {'type': 'loss', 'content': 0.20649820566177368, 'timestamp': '2025-10-01 04:23:20.631607', 'step': 5358, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:20.687212', 'step': 5358, 'epoch': 1} {'type': 'loss', 'content': 0.11547981202602386, 'timestamp': '2025-10-01 04:23:20.689841', 'step': 5359, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:20.743141', 'step': 5359, 'epoch': 1} {'type': 'loss', 'content': 0.16393227875232697, 'timestamp': '2025-10-01 04:23:20.748967', 'step': 5360, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:20.803850', 'step': 5360, 'epoch': 1} {'type': 'loss', 'content': 0.11926853656768799, 'timestamp': '2025-10-01 04:23:20.805948', 'step': 5361, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:20.859272', 'step': 5361, 'epoch': 1} {'type': 'loss', 'content': 0.13796859979629517, 'timestamp': '2025-10-01 04:23:20.865383', 'step': 5362, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:20.920401', 'step': 5362, 'epoch': 1} {'type': 'loss', 'content': 0.15238933265209198, 'timestamp': '2025-10-01 04:23:20.922672', 'step': 5363, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:20.975748', 'step': 5363, 'epoch': 1} {'type': 'loss', 'content': 0.12233759462833405, 'timestamp': '2025-10-01 04:23:20.984593', 'step': 5364, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:23:21.040570', 'step': 5364, 'epoch': 1} {'type': 'loss', 'content': 0.1261747032403946, 'timestamp': '2025-10-01 04:23:21.042949', 'step': 5365, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:21.098078', 'step': 5365, 'epoch': 1} {'type': 'loss', 'content': 0.14745748043060303, 'timestamp': '2025-10-01 04:23:21.100407', 'step': 5366, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:21.153914', 'step': 5366, 'epoch': 1} {'type': 'loss', 'content': 0.11683550477027893, 'timestamp': '2025-10-01 04:23:21.156771', 'step': 5367, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:21.210030', 'step': 5367, 'epoch': 1} {'type': 'loss', 'content': 0.13118523359298706, 'timestamp': '2025-10-01 04:23:21.215885', 'step': 5368, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:21.269606', 'step': 5368, 'epoch': 1} {'type': 'loss', 'content': 0.19296739995479584, 'timestamp': '2025-10-01 04:23:21.272663', 'step': 5369, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:21.327182', 'step': 5369, 'epoch': 1} {'type': 'loss', 'content': 0.12211106717586517, 'timestamp': '2025-10-01 04:23:21.329471', 'step': 5370, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:23:21.383079', 'step': 5370, 'epoch': 1} {'type': 'loss', 'content': 0.148202046751976, 'timestamp': '2025-10-01 04:23:21.385579', 'step': 5371, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:21.442548', 'step': 5371, 'epoch': 1} {'type': 'loss', 'content': 0.18542179465293884, 'timestamp': '2025-10-01 04:23:21.448433', 'step': 5372, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:23:21.501768', 'step': 5372, 'epoch': 1} {'type': 'loss', 'content': 0.1532563418149948, 'timestamp': '2025-10-01 04:23:21.503925', 'step': 5373, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:21.557041', 'step': 5373, 'epoch': 1} {'type': 'loss', 'content': 0.1415412724018097, 'timestamp': '2025-10-01 04:23:21.560455', 'step': 5374, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:21.618117', 'step': 5374, 'epoch': 1} {'type': 'loss', 'content': 0.08486852049827576, 'timestamp': '2025-10-01 04:23:21.620306', 'step': 5375, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:21.676441', 'step': 5375, 'epoch': 1} {'type': 'loss', 'content': 0.13922472298145294, 'timestamp': '2025-10-01 04:23:21.682237', 'step': 5376, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:21.735218', 'step': 5376, 'epoch': 1} {'type': 'loss', 'content': 0.1277225911617279, 'timestamp': '2025-10-01 04:23:21.738228', 'step': 5377, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:21.792347', 'step': 5377, 'epoch': 1} {'type': 'loss', 'content': 0.14178742468357086, 'timestamp': '2025-10-01 04:23:21.795266', 'step': 5378, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:21.849320', 'step': 5378, 'epoch': 1} {'type': 'loss', 'content': 0.2210008203983307, 'timestamp': '2025-10-01 04:23:21.852571', 'step': 5379, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:21.906343', 'step': 5379, 'epoch': 1} {'type': 'loss', 'content': 0.1876409947872162, 'timestamp': '2025-10-01 04:23:21.912832', 'step': 5380, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:23:21.965439', 'step': 5380, 'epoch': 1} {'type': 'loss', 'content': 0.11978979408740997, 'timestamp': '2025-10-01 04:23:21.967644', 'step': 5381, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:22.021100', 'step': 5381, 'epoch': 1} {'type': 'loss', 'content': 0.12389923632144928, 'timestamp': '2025-10-01 04:23:22.023282', 'step': 5382, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:22.076900', 'step': 5382, 'epoch': 1} {'type': 'loss', 'content': 0.10620220750570297, 'timestamp': '2025-10-01 04:23:22.079123', 'step': 5383, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:22.132056', 'step': 5383, 'epoch': 1} {'type': 'loss', 'content': 0.17052505910396576, 'timestamp': '2025-10-01 04:23:22.137678', 'step': 5384, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:22.189949', 'step': 5384, 'epoch': 1} {'type': 'loss', 'content': 0.12576523423194885, 'timestamp': '2025-10-01 04:23:22.192302', 'step': 5385, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:22.245185', 'step': 5385, 'epoch': 1} {'type': 'loss', 'content': 0.11634963750839233, 'timestamp': '2025-10-01 04:23:22.247596', 'step': 5386, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:22.300454', 'step': 5386, 'epoch': 1} {'type': 'loss', 'content': 0.15371930599212646, 'timestamp': '2025-10-01 04:23:22.302730', 'step': 5387, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:23:22.356397', 'step': 5387, 'epoch': 1} {'type': 'loss', 'content': 0.1800394505262375, 'timestamp': '2025-10-01 04:23:22.374961', 'step': 5388, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:22.427549', 'step': 5388, 'epoch': 1} {'type': 'loss', 'content': 0.1385023295879364, 'timestamp': '2025-10-01 04:23:22.429990', 'step': 5389, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:22.482756', 'step': 5389, 'epoch': 1} {'type': 'loss', 'content': 0.19843566417694092, 'timestamp': '2025-10-01 04:23:22.485379', 'step': 5390, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:22.538353', 'step': 5390, 'epoch': 1} {'type': 'loss', 'content': 0.1985245943069458, 'timestamp': '2025-10-01 04:23:22.540714', 'step': 5391, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:22.593731', 'step': 5391, 'epoch': 1} {'type': 'loss', 'content': 0.24143080413341522, 'timestamp': '2025-10-01 04:23:22.599590', 'step': 5392, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:22.652116', 'step': 5392, 'epoch': 1} {'type': 'loss', 'content': 0.08717256784439087, 'timestamp': '2025-10-01 04:23:22.660418', 'step': 5393, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:22.713810', 'step': 5393, 'epoch': 1} {'type': 'loss', 'content': 0.13189950585365295, 'timestamp': '2025-10-01 04:23:22.716374', 'step': 5394, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:22.769729', 'step': 5394, 'epoch': 1} {'type': 'loss', 'content': 0.23131176829338074, 'timestamp': '2025-10-01 04:23:22.772005', 'step': 5395, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:22.824874', 'step': 5395, 'epoch': 1} {'type': 'loss', 'content': 0.2164231240749359, 'timestamp': '2025-10-01 04:23:22.830631', 'step': 5396, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:22.883359', 'step': 5396, 'epoch': 1} {'type': 'loss', 'content': 0.12050121277570724, 'timestamp': '2025-10-01 04:23:22.885658', 'step': 5397, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:22.938841', 'step': 5397, 'epoch': 1} {'type': 'loss', 'content': 0.1950882226228714, 'timestamp': '2025-10-01 04:23:22.940856', 'step': 5398, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:22.994206', 'step': 5398, 'epoch': 1} {'type': 'loss', 'content': 0.2113581746816635, 'timestamp': '2025-10-01 04:23:22.996479', 'step': 5399, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:23.050055', 'step': 5399, 'epoch': 1} {'type': 'loss', 'content': 0.1826688051223755, 'timestamp': '2025-10-01 04:23:23.056169', 'step': 5400, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:23.110755', 'step': 5400, 'epoch': 1} {'type': 'loss', 'content': 0.12657248973846436, 'timestamp': '2025-10-01 04:23:23.112978', 'step': 5401, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:23.166156', 'step': 5401, 'epoch': 1} {'type': 'loss', 'content': 0.15103459358215332, 'timestamp': '2025-10-01 04:23:23.168272', 'step': 5402, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:23:23.221000', 'step': 5402, 'epoch': 1} {'type': 'loss', 'content': 0.15398649871349335, 'timestamp': '2025-10-01 04:23:23.223087', 'step': 5403, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:23.276412', 'step': 5403, 'epoch': 1} {'type': 'loss', 'content': 0.1367117166519165, 'timestamp': '2025-10-01 04:23:23.282250', 'step': 5404, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:23.335124', 'step': 5404, 'epoch': 1} {'type': 'loss', 'content': 0.11969860643148422, 'timestamp': '2025-10-01 04:23:23.337179', 'step': 5405, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:23.389829', 'step': 5405, 'epoch': 1} {'type': 'loss', 'content': 0.19339315593242645, 'timestamp': '2025-10-01 04:23:23.391969', 'step': 5406, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:23.445631', 'step': 5406, 'epoch': 1} {'type': 'loss', 'content': 0.17471933364868164, 'timestamp': '2025-10-01 04:23:23.447865', 'step': 5407, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:23.500529', 'step': 5407, 'epoch': 1} {'type': 'loss', 'content': 0.13016776740550995, 'timestamp': '2025-10-01 04:23:23.506218', 'step': 5408, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:23.558944', 'step': 5408, 'epoch': 1} {'type': 'loss', 'content': 0.10167311131954193, 'timestamp': '2025-10-01 04:23:23.561326', 'step': 5409, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:23.614264', 'step': 5409, 'epoch': 1} {'type': 'loss', 'content': 0.11723239719867706, 'timestamp': '2025-10-01 04:23:23.616462', 'step': 5410, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:23.669797', 'step': 5410, 'epoch': 1} {'type': 'loss', 'content': 0.1559939980506897, 'timestamp': '2025-10-01 04:23:23.672484', 'step': 5411, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:23:23.726166', 'step': 5411, 'epoch': 1} {'type': 'loss', 'content': 0.21899062395095825, 'timestamp': '2025-10-01 04:23:23.731514', 'step': 5412, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:23.783723', 'step': 5412, 'epoch': 1} {'type': 'loss', 'content': 0.1437450796365738, 'timestamp': '2025-10-01 04:23:23.785933', 'step': 5413, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:23.838555', 'step': 5413, 'epoch': 1} {'type': 'loss', 'content': 0.12754201889038086, 'timestamp': '2025-10-01 04:23:23.840663', 'step': 5414, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:23.898350', 'step': 5414, 'epoch': 1} {'type': 'loss', 'content': 0.18946035206317902, 'timestamp': '2025-10-01 04:23:23.900697', 'step': 5415, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:23.953936', 'step': 5415, 'epoch': 1} {'type': 'loss', 'content': 0.24811674654483795, 'timestamp': '2025-10-01 04:23:23.959642', 'step': 5416, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:24.011593', 'step': 5416, 'epoch': 1} {'type': 'loss', 'content': 0.16957248747348785, 'timestamp': '2025-10-01 04:23:24.013784', 'step': 5417, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:23:24.066783', 'step': 5417, 'epoch': 1} {'type': 'loss', 'content': 0.21094045042991638, 'timestamp': '2025-10-01 04:23:24.069383', 'step': 5418, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:24.129121', 'step': 5418, 'epoch': 1} {'type': 'loss', 'content': 0.12227050960063934, 'timestamp': '2025-10-01 04:23:24.131279', 'step': 5419, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:24.184538', 'step': 5419, 'epoch': 1} {'type': 'loss', 'content': 0.12672750651836395, 'timestamp': '2025-10-01 04:23:24.190455', 'step': 5420, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:24.243077', 'step': 5420, 'epoch': 1} {'type': 'loss', 'content': 0.13054713606834412, 'timestamp': '2025-10-01 04:23:24.245507', 'step': 5421, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:24.298834', 'step': 5421, 'epoch': 1} {'type': 'loss', 'content': 0.22838670015335083, 'timestamp': '2025-10-01 04:23:24.300850', 'step': 5422, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:24.354208', 'step': 5422, 'epoch': 1} {'type': 'loss', 'content': 0.14128781855106354, 'timestamp': '2025-10-01 04:23:24.356538', 'step': 5423, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:24.409726', 'step': 5423, 'epoch': 1} {'type': 'loss', 'content': 0.194962739944458, 'timestamp': '2025-10-01 04:23:24.415653', 'step': 5424, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:23:24.468465', 'step': 5424, 'epoch': 1} {'type': 'loss', 'content': 0.1558847278356552, 'timestamp': '2025-10-01 04:23:24.470680', 'step': 5425, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:24.523958', 'step': 5425, 'epoch': 1} {'type': 'loss', 'content': 0.10305280983448029, 'timestamp': '2025-10-01 04:23:24.526183', 'step': 5426, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:24.579827', 'step': 5426, 'epoch': 1} {'type': 'loss', 'content': 0.09909386187791824, 'timestamp': '2025-10-01 04:23:24.589320', 'step': 5427, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:24.642903', 'step': 5427, 'epoch': 1} {'type': 'loss', 'content': 0.25613296031951904, 'timestamp': '2025-10-01 04:23:24.648617', 'step': 5428, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:24.701485', 'step': 5428, 'epoch': 1} {'type': 'loss', 'content': 0.10620573163032532, 'timestamp': '2025-10-01 04:23:24.703987', 'step': 5429, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:24.757576', 'step': 5429, 'epoch': 1} {'type': 'loss', 'content': 0.11400894075632095, 'timestamp': '2025-10-01 04:23:24.759804', 'step': 5430, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:24.813172', 'step': 5430, 'epoch': 1} {'type': 'loss', 'content': 0.18945232033729553, 'timestamp': '2025-10-01 04:23:24.815448', 'step': 5431, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:24.869793', 'step': 5431, 'epoch': 1} {'type': 'loss', 'content': 0.12734031677246094, 'timestamp': '2025-10-01 04:23:24.875472', 'step': 5432, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:24.928636', 'step': 5432, 'epoch': 1} {'type': 'loss', 'content': 0.1396757960319519, 'timestamp': '2025-10-01 04:23:24.930866', 'step': 5433, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:24.983823', 'step': 5433, 'epoch': 1} {'type': 'loss', 'content': 0.18923106789588928, 'timestamp': '2025-10-01 04:23:24.985988', 'step': 5434, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:25.057594', 'step': 5434, 'epoch': 1} {'type': 'loss', 'content': 0.2756684720516205, 'timestamp': '2025-10-01 04:23:25.059870', 'step': 5435, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:25.112042', 'step': 5435, 'epoch': 1} {'type': 'loss', 'content': 0.14835378527641296, 'timestamp': '2025-10-01 04:23:25.117705', 'step': 5436, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:25.170285', 'step': 5436, 'epoch': 1} {'type': 'loss', 'content': 0.15922312438488007, 'timestamp': '2025-10-01 04:23:25.174028', 'step': 5437, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:25.228607', 'step': 5437, 'epoch': 1} {'type': 'loss', 'content': 0.15790611505508423, 'timestamp': '2025-10-01 04:23:25.230862', 'step': 5438, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:25.284287', 'step': 5438, 'epoch': 1} {'type': 'loss', 'content': 0.1428927183151245, 'timestamp': '2025-10-01 04:23:25.286526', 'step': 5439, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:25.339213', 'step': 5439, 'epoch': 1} {'type': 'loss', 'content': 0.12934981286525726, 'timestamp': '2025-10-01 04:23:25.345034', 'step': 5440, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:25.399540', 'step': 5440, 'epoch': 1} {'type': 'loss', 'content': 0.21507331728935242, 'timestamp': '2025-10-01 04:23:25.401886', 'step': 5441, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:25.455011', 'step': 5441, 'epoch': 1} {'type': 'loss', 'content': 0.1824280023574829, 'timestamp': '2025-10-01 04:23:25.457175', 'step': 5442, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:25.510386', 'step': 5442, 'epoch': 1} {'type': 'loss', 'content': 0.14885982871055603, 'timestamp': '2025-10-01 04:23:25.513581', 'step': 5443, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:25.567232', 'step': 5443, 'epoch': 1} {'type': 'loss', 'content': 0.12894375622272491, 'timestamp': '2025-10-01 04:23:25.573452', 'step': 5444, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:25.625677', 'step': 5444, 'epoch': 1} {'type': 'loss', 'content': 0.15566378831863403, 'timestamp': '2025-10-01 04:23:25.627669', 'step': 5445, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:25.691678', 'step': 5445, 'epoch': 1} {'type': 'loss', 'content': 0.13709531724452972, 'timestamp': '2025-10-01 04:23:25.695474', 'step': 5446, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:25.748174', 'step': 5446, 'epoch': 1} {'type': 'loss', 'content': 0.16487304866313934, 'timestamp': '2025-10-01 04:23:25.750280', 'step': 5447, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:25.813793', 'step': 5447, 'epoch': 1} {'type': 'loss', 'content': 0.1618683934211731, 'timestamp': '2025-10-01 04:23:25.819542', 'step': 5448, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:25.875475', 'step': 5448, 'epoch': 1} {'type': 'loss', 'content': 0.1916523426771164, 'timestamp': '2025-10-01 04:23:25.877934', 'step': 5449, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:23:25.937937', 'step': 5449, 'epoch': 1} {'type': 'loss', 'content': 0.13754060864448547, 'timestamp': '2025-10-01 04:23:25.940140', 'step': 5450, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:25.997825', 'step': 5450, 'epoch': 1} {'type': 'loss', 'content': 0.26724982261657715, 'timestamp': '2025-10-01 04:23:26.000203', 'step': 5451, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:26.054133', 'step': 5451, 'epoch': 1} {'type': 'loss', 'content': 0.13815174996852875, 'timestamp': '2025-10-01 04:23:26.059795', 'step': 5452, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:23:26.112412', 'step': 5452, 'epoch': 1} {'type': 'loss', 'content': 0.1590348780155182, 'timestamp': '2025-10-01 04:23:26.114734', 'step': 5453, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:26.168029', 'step': 5453, 'epoch': 1} {'type': 'loss', 'content': 0.25841036438941956, 'timestamp': '2025-10-01 04:23:26.171652', 'step': 5454, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:26.225052', 'step': 5454, 'epoch': 1} {'type': 'loss', 'content': 0.13924308121204376, 'timestamp': '2025-10-01 04:23:26.227195', 'step': 5455, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:26.280035', 'step': 5455, 'epoch': 1} {'type': 'loss', 'content': 0.08525239676237106, 'timestamp': '2025-10-01 04:23:26.285833', 'step': 5456, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:26.338453', 'step': 5456, 'epoch': 1} {'type': 'loss', 'content': 0.10880225896835327, 'timestamp': '2025-10-01 04:23:26.340693', 'step': 5457, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:26.404986', 'step': 5457, 'epoch': 1} {'type': 'loss', 'content': 0.2454308420419693, 'timestamp': '2025-10-01 04:23:26.407300', 'step': 5458, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:26.461075', 'step': 5458, 'epoch': 1} {'type': 'loss', 'content': 0.1438523530960083, 'timestamp': '2025-10-01 04:23:26.465210', 'step': 5459, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:26.522376', 'step': 5459, 'epoch': 1} {'type': 'loss', 'content': 0.19698283076286316, 'timestamp': '2025-10-01 04:23:26.528275', 'step': 5460, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:23:26.581053', 'step': 5460, 'epoch': 1} {'type': 'loss', 'content': 0.13950873911380768, 'timestamp': '2025-10-01 04:23:26.583026', 'step': 5461, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:26.636095', 'step': 5461, 'epoch': 1} {'type': 'loss', 'content': 0.14295057952404022, 'timestamp': '2025-10-01 04:23:26.638284', 'step': 5462, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:26.693896', 'step': 5462, 'epoch': 1} {'type': 'loss', 'content': 0.162301704287529, 'timestamp': '2025-10-01 04:23:26.696169', 'step': 5463, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:26.749113', 'step': 5463, 'epoch': 1} {'type': 'loss', 'content': 0.09104263782501221, 'timestamp': '2025-10-01 04:23:26.754947', 'step': 5464, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:26.807379', 'step': 5464, 'epoch': 1} {'type': 'loss', 'content': 0.08599983155727386, 'timestamp': '2025-10-01 04:23:26.809597', 'step': 5465, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:26.862574', 'step': 5465, 'epoch': 1} {'type': 'loss', 'content': 0.18624354898929596, 'timestamp': '2025-10-01 04:23:26.864592', 'step': 5466, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:26.918595', 'step': 5466, 'epoch': 1} {'type': 'loss', 'content': 0.15946684777736664, 'timestamp': '2025-10-01 04:23:26.921380', 'step': 5467, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:26.974686', 'step': 5467, 'epoch': 1} {'type': 'loss', 'content': 0.24357549846172333, 'timestamp': '2025-10-01 04:23:26.980548', 'step': 5468, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:23:27.033479', 'step': 5468, 'epoch': 1} {'type': 'loss', 'content': 0.12586122751235962, 'timestamp': '2025-10-01 04:23:27.039574', 'step': 5469, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:27.097403', 'step': 5469, 'epoch': 1} {'type': 'loss', 'content': 0.2536831796169281, 'timestamp': '2025-10-01 04:23:27.100132', 'step': 5470, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:27.156041', 'step': 5470, 'epoch': 1} {'type': 'loss', 'content': 0.1449500322341919, 'timestamp': '2025-10-01 04:23:27.158283', 'step': 5471, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:27.212128', 'step': 5471, 'epoch': 1} {'type': 'loss', 'content': 0.16310487687587738, 'timestamp': '2025-10-01 04:23:27.218585', 'step': 5472, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:27.273052', 'step': 5472, 'epoch': 1} {'type': 'loss', 'content': 0.1434922069311142, 'timestamp': '2025-10-01 04:23:27.275514', 'step': 5473, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:27.330255', 'step': 5473, 'epoch': 1} {'type': 'loss', 'content': 0.18042176961898804, 'timestamp': '2025-10-01 04:23:27.332977', 'step': 5474, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:27.388213', 'step': 5474, 'epoch': 1} {'type': 'loss', 'content': 0.10074310004711151, 'timestamp': '2025-10-01 04:23:27.391111', 'step': 5475, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:27.445378', 'step': 5475, 'epoch': 1} {'type': 'loss', 'content': 0.10066230595111847, 'timestamp': '2025-10-01 04:23:27.451959', 'step': 5476, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:27.505305', 'step': 5476, 'epoch': 1} {'type': 'loss', 'content': 0.23998993635177612, 'timestamp': '2025-10-01 04:23:27.508075', 'step': 5477, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:27.562268', 'step': 5477, 'epoch': 1} {'type': 'loss', 'content': 0.2422831952571869, 'timestamp': '2025-10-01 04:23:27.566385', 'step': 5478, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:27.626432', 'step': 5478, 'epoch': 1} {'type': 'loss', 'content': 0.15783695876598358, 'timestamp': '2025-10-01 04:23:27.629519', 'step': 5479, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:27.683768', 'step': 5479, 'epoch': 1} {'type': 'loss', 'content': 0.10727459192276001, 'timestamp': '2025-10-01 04:23:27.690192', 'step': 5480, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:27.743857', 'step': 5480, 'epoch': 1} {'type': 'loss', 'content': 0.20687560737133026, 'timestamp': '2025-10-01 04:23:27.746363', 'step': 5481, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:27.823979', 'step': 5481, 'epoch': 1} {'type': 'loss', 'content': 0.10978569835424423, 'timestamp': '2025-10-01 04:23:27.826495', 'step': 5482, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:27.885668', 'step': 5482, 'epoch': 1} {'type': 'loss', 'content': 0.15092270076274872, 'timestamp': '2025-10-01 04:23:27.890032', 'step': 5483, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:27.944750', 'step': 5483, 'epoch': 1} {'type': 'loss', 'content': 0.17887718975543976, 'timestamp': '2025-10-01 04:23:27.953266', 'step': 5484, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:28.007946', 'step': 5484, 'epoch': 1} {'type': 'loss', 'content': 0.1031496524810791, 'timestamp': '2025-10-01 04:23:28.010401', 'step': 5485, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:28.064372', 'step': 5485, 'epoch': 1} {'type': 'loss', 'content': 0.2362966537475586, 'timestamp': '2025-10-01 04:23:28.068902', 'step': 5486, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:28.126726', 'step': 5486, 'epoch': 1} {'type': 'loss', 'content': 0.16219580173492432, 'timestamp': '2025-10-01 04:23:28.129439', 'step': 5487, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:28.185125', 'step': 5487, 'epoch': 1} {'type': 'loss', 'content': 0.15845318138599396, 'timestamp': '2025-10-01 04:23:28.191615', 'step': 5488, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:28.249639', 'step': 5488, 'epoch': 1} {'type': 'loss', 'content': 0.09212936460971832, 'timestamp': '2025-10-01 04:23:28.252234', 'step': 5489, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:28.306402', 'step': 5489, 'epoch': 1} {'type': 'loss', 'content': 0.14239872992038727, 'timestamp': '2025-10-01 04:23:28.309234', 'step': 5490, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:28.363923', 'step': 5490, 'epoch': 1} {'type': 'loss', 'content': 0.09707076102495193, 'timestamp': '2025-10-01 04:23:28.366580', 'step': 5491, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:28.420779', 'step': 5491, 'epoch': 1} {'type': 'loss', 'content': 0.10786443203687668, 'timestamp': '2025-10-01 04:23:28.426905', 'step': 5492, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:28.480391', 'step': 5492, 'epoch': 1} {'type': 'loss', 'content': 0.18550363183021545, 'timestamp': '2025-10-01 04:23:28.483249', 'step': 5493, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:23:28.537973', 'step': 5493, 'epoch': 1} {'type': 'loss', 'content': 0.13822966814041138, 'timestamp': '2025-10-01 04:23:28.540416', 'step': 5494, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:28.595218', 'step': 5494, 'epoch': 1} {'type': 'loss', 'content': 0.14950457215309143, 'timestamp': '2025-10-01 04:23:28.597349', 'step': 5495, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:28.650190', 'step': 5495, 'epoch': 1} {'type': 'loss', 'content': 0.21583041548728943, 'timestamp': '2025-10-01 04:23:28.656031', 'step': 5496, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:28.713094', 'step': 5496, 'epoch': 1} {'type': 'loss', 'content': 0.12040333449840546, 'timestamp': '2025-10-01 04:23:28.715231', 'step': 5497, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:28.768333', 'step': 5497, 'epoch': 1} {'type': 'loss', 'content': 0.14157506823539734, 'timestamp': '2025-10-01 04:23:28.774082', 'step': 5498, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:28.827842', 'step': 5498, 'epoch': 1} {'type': 'loss', 'content': 0.16501647233963013, 'timestamp': '2025-10-01 04:23:28.830112', 'step': 5499, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:23:28.886839', 'step': 5499, 'epoch': 1} {'type': 'loss', 'content': 0.20005419850349426, 'timestamp': '2025-10-01 04:23:28.892680', 'step': 5500, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 5500', 'timestamp': '2025-10-01 04:23:29.298376', 'step': 5500, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:29.350982', 'step': 5500, 'epoch': 1} {'type': 'loss', 'content': 0.18805482983589172, 'timestamp': '2025-10-01 04:23:29.353668', 'step': 5501, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:29.408192', 'step': 5501, 'epoch': 1} {'type': 'loss', 'content': 0.28649675846099854, 'timestamp': '2025-10-01 04:23:29.410482', 'step': 5502, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:29.464221', 'step': 5502, 'epoch': 1} {'type': 'loss', 'content': 0.036088645458221436, 'timestamp': '2025-10-01 04:23:29.467828', 'step': 5503, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:29.523609', 'step': 5503, 'epoch': 1} {'type': 'loss', 'content': 0.11888420581817627, 'timestamp': '2025-10-01 04:23:29.529737', 'step': 5504, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:23:29.584340', 'step': 5504, 'epoch': 1} {'type': 'loss', 'content': 0.12003594636917114, 'timestamp': '2025-10-01 04:23:29.586973', 'step': 5505, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:23:29.640820', 'step': 5505, 'epoch': 1} {'type': 'loss', 'content': 0.20750880241394043, 'timestamp': '2025-10-01 04:23:29.643293', 'step': 5506, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:29.696601', 'step': 5506, 'epoch': 1} {'type': 'loss', 'content': 0.19356811046600342, 'timestamp': '2025-10-01 04:23:29.698786', 'step': 5507, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:29.751878', 'step': 5507, 'epoch': 1} {'type': 'loss', 'content': 0.1701977550983429, 'timestamp': '2025-10-01 04:23:29.762521', 'step': 5508, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:29.818742', 'step': 5508, 'epoch': 1} {'type': 'loss', 'content': 0.1533031165599823, 'timestamp': '2025-10-01 04:23:29.827271', 'step': 5509, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:29.894008', 'step': 5509, 'epoch': 1} {'type': 'loss', 'content': 0.1797351837158203, 'timestamp': '2025-10-01 04:23:29.898405', 'step': 5510, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:29.954238', 'step': 5510, 'epoch': 1} {'type': 'loss', 'content': 0.188677579164505, 'timestamp': '2025-10-01 04:23:29.959898', 'step': 5511, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:30.020627', 'step': 5511, 'epoch': 1} {'type': 'loss', 'content': 0.1331336349248886, 'timestamp': '2025-10-01 04:23:30.028207', 'step': 5512, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:23:30.081053', 'step': 5512, 'epoch': 1} {'type': 'loss', 'content': 0.21082958579063416, 'timestamp': '2025-10-01 04:23:30.086588', 'step': 5513, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:30.142484', 'step': 5513, 'epoch': 1} {'type': 'loss', 'content': 0.14891080558300018, 'timestamp': '2025-10-01 04:23:30.145822', 'step': 5514, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:23:30.198892', 'step': 5514, 'epoch': 1} {'type': 'loss', 'content': 0.1478736847639084, 'timestamp': '2025-10-01 04:23:30.201203', 'step': 5515, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:30.255860', 'step': 5515, 'epoch': 1} {'type': 'loss', 'content': 0.10267353802919388, 'timestamp': '2025-10-01 04:23:30.262702', 'step': 5516, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:23:30.318650', 'step': 5516, 'epoch': 1} {'type': 'loss', 'content': 0.1408478021621704, 'timestamp': '2025-10-01 04:23:30.320846', 'step': 5517, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:30.375473', 'step': 5517, 'epoch': 1} {'type': 'loss', 'content': 0.17766815423965454, 'timestamp': '2025-10-01 04:23:30.377767', 'step': 5518, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:30.434820', 'step': 5518, 'epoch': 1} {'type': 'loss', 'content': 0.11892686784267426, 'timestamp': '2025-10-01 04:23:30.437628', 'step': 5519, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:30.500888', 'step': 5519, 'epoch': 1} {'type': 'loss', 'content': 0.11222153902053833, 'timestamp': '2025-10-01 04:23:30.507046', 'step': 5520, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:30.561181', 'step': 5520, 'epoch': 1} {'type': 'loss', 'content': 0.19437557458877563, 'timestamp': '2025-10-01 04:23:30.563453', 'step': 5521, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:30.616629', 'step': 5521, 'epoch': 1} {'type': 'loss', 'content': 0.10506114363670349, 'timestamp': '2025-10-01 04:23:30.620568', 'step': 5522, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:23:30.675798', 'step': 5522, 'epoch': 1} {'type': 'loss', 'content': 0.16394536197185516, 'timestamp': '2025-10-01 04:23:30.680322', 'step': 5523, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:30.733555', 'step': 5523, 'epoch': 1} {'type': 'loss', 'content': 0.1624411791563034, 'timestamp': '2025-10-01 04:23:30.739508', 'step': 5524, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:30.801948', 'step': 5524, 'epoch': 1} {'type': 'loss', 'content': 0.10365542769432068, 'timestamp': '2025-10-01 04:23:30.804515', 'step': 5525, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-01 04:23:30.858363', 'step': 5525, 'epoch': 1} {'type': 'loss', 'content': 0.2378460168838501, 'timestamp': '2025-10-01 04:23:30.860652', 'step': 5526, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:23:30.918853', 'step': 5526, 'epoch': 1} {'type': 'loss', 'content': 0.1750597059726715, 'timestamp': '2025-10-01 04:23:30.921173', 'step': 5527, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:30.976668', 'step': 5527, 'epoch': 1} {'type': 'loss', 'content': 0.1681739240884781, 'timestamp': '2025-10-01 04:23:30.982551', 'step': 5528, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:31.037571', 'step': 5528, 'epoch': 1} {'type': 'loss', 'content': 0.09323158860206604, 'timestamp': '2025-10-01 04:23:31.040090', 'step': 5529, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:31.093438', 'step': 5529, 'epoch': 1} {'type': 'loss', 'content': 0.1509637087583542, 'timestamp': '2025-10-01 04:23:31.095722', 'step': 5530, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:31.149118', 'step': 5530, 'epoch': 1} {'type': 'loss', 'content': 0.15110185742378235, 'timestamp': '2025-10-01 04:23:31.151283', 'step': 5531, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:31.204044', 'step': 5531, 'epoch': 1} {'type': 'loss', 'content': 0.23145878314971924, 'timestamp': '2025-10-01 04:23:31.209985', 'step': 5532, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:31.263000', 'step': 5532, 'epoch': 1} {'type': 'loss', 'content': 0.1538403481245041, 'timestamp': '2025-10-01 04:23:31.265160', 'step': 5533, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:31.319123', 'step': 5533, 'epoch': 1} {'type': 'loss', 'content': 0.2582217752933502, 'timestamp': '2025-10-01 04:23:31.321575', 'step': 5534, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-01 04:23:31.377690', 'step': 5534, 'epoch': 1} {'type': 'loss', 'content': 0.18596038222312927, 'timestamp': '2025-10-01 04:23:31.379939', 'step': 5535, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:31.440855', 'step': 5535, 'epoch': 1} {'type': 'loss', 'content': 0.16132348775863647, 'timestamp': '2025-10-01 04:23:31.447983', 'step': 5536, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:31.505495', 'step': 5536, 'epoch': 1} {'type': 'loss', 'content': 0.14938542246818542, 'timestamp': '2025-10-01 04:23:31.507674', 'step': 5537, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:31.561408', 'step': 5537, 'epoch': 1} {'type': 'loss', 'content': 0.18948586285114288, 'timestamp': '2025-10-01 04:23:31.563654', 'step': 5538, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:23:31.616900', 'step': 5538, 'epoch': 1} {'type': 'loss', 'content': 0.18030621111392975, 'timestamp': '2025-10-01 04:23:31.619900', 'step': 5539, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:23:31.673636', 'step': 5539, 'epoch': 1} {'type': 'loss', 'content': 0.1590537279844284, 'timestamp': '2025-10-01 04:23:31.679383', 'step': 5540, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:23:31.732655', 'step': 5540, 'epoch': 1} {'type': 'loss', 'content': 0.1400965452194214, 'timestamp': '2025-10-01 04:23:31.734833', 'step': 5541, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:31.788346', 'step': 5541, 'epoch': 1} {'type': 'loss', 'content': 0.1395779699087143, 'timestamp': '2025-10-01 04:23:31.790428', 'step': 5542, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:31.843707', 'step': 5542, 'epoch': 1} {'type': 'loss', 'content': 0.14670312404632568, 'timestamp': '2025-10-01 04:23:31.847869', 'step': 5543, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:31.902614', 'step': 5543, 'epoch': 1} {'type': 'loss', 'content': 0.1656443029642105, 'timestamp': '2025-10-01 04:23:31.908215', 'step': 5544, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:31.960759', 'step': 5544, 'epoch': 1} {'type': 'loss', 'content': 0.11619008332490921, 'timestamp': '2025-10-01 04:23:31.962876', 'step': 5545, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:32.015627', 'step': 5545, 'epoch': 1} {'type': 'loss', 'content': 0.20206879079341888, 'timestamp': '2025-10-01 04:23:32.017779', 'step': 5546, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:32.071022', 'step': 5546, 'epoch': 1} {'type': 'loss', 'content': 0.15540587902069092, 'timestamp': '2025-10-01 04:23:32.073642', 'step': 5547, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:32.127481', 'step': 5547, 'epoch': 1} {'type': 'loss', 'content': 0.16751286387443542, 'timestamp': '2025-10-01 04:23:32.133237', 'step': 5548, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:32.185361', 'step': 5548, 'epoch': 1} {'type': 'loss', 'content': 0.16899549961090088, 'timestamp': '2025-10-01 04:23:32.187553', 'step': 5549, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:32.241142', 'step': 5549, 'epoch': 1} {'type': 'loss', 'content': 0.1538512110710144, 'timestamp': '2025-10-01 04:23:32.243359', 'step': 5550, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:32.299801', 'step': 5550, 'epoch': 1} {'type': 'loss', 'content': 0.2779597043991089, 'timestamp': '2025-10-01 04:23:32.303049', 'step': 5551, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:32.357181', 'step': 5551, 'epoch': 1} {'type': 'loss', 'content': 0.21831141412258148, 'timestamp': '2025-10-01 04:23:32.363285', 'step': 5552, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:32.416204', 'step': 5552, 'epoch': 1} {'type': 'loss', 'content': 0.28782618045806885, 'timestamp': '2025-10-01 04:23:32.418403', 'step': 5553, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:32.471585', 'step': 5553, 'epoch': 1} {'type': 'loss', 'content': 0.16890478134155273, 'timestamp': '2025-10-01 04:23:32.473776', 'step': 5554, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:32.530240', 'step': 5554, 'epoch': 1} {'type': 'loss', 'content': 0.14894436299800873, 'timestamp': '2025-10-01 04:23:32.540798', 'step': 5555, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:32.593693', 'step': 5555, 'epoch': 1} {'type': 'loss', 'content': 0.13598939776420593, 'timestamp': '2025-10-01 04:23:32.599515', 'step': 5556, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:32.667244', 'step': 5556, 'epoch': 1} {'type': 'loss', 'content': 0.2117665410041809, 'timestamp': '2025-10-01 04:23:32.670669', 'step': 5557, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:23:32.724190', 'step': 5557, 'epoch': 1} {'type': 'loss', 'content': 0.17604905366897583, 'timestamp': '2025-10-01 04:23:32.726881', 'step': 5558, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:32.780416', 'step': 5558, 'epoch': 1} {'type': 'loss', 'content': 0.13869021832942963, 'timestamp': '2025-10-01 04:23:32.782747', 'step': 5559, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:32.841939', 'step': 5559, 'epoch': 1} {'type': 'loss', 'content': 0.09139850735664368, 'timestamp': '2025-10-01 04:23:32.847864', 'step': 5560, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:32.900594', 'step': 5560, 'epoch': 1} {'type': 'loss', 'content': 0.11882305145263672, 'timestamp': '2025-10-01 04:23:32.902802', 'step': 5561, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:32.957620', 'step': 5561, 'epoch': 1} {'type': 'loss', 'content': 0.10655926913022995, 'timestamp': '2025-10-01 04:23:32.959749', 'step': 5562, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:23:33.013495', 'step': 5562, 'epoch': 1} {'type': 'loss', 'content': 0.2291230410337448, 'timestamp': '2025-10-01 04:23:33.015794', 'step': 5563, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-01 04:23:33.076241', 'step': 5563, 'epoch': 1} {'type': 'loss', 'content': 0.13009200990200043, 'timestamp': '2025-10-01 04:23:33.082016', 'step': 5564, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:23:33.136104', 'step': 5564, 'epoch': 1} {'type': 'loss', 'content': 0.16414035856723785, 'timestamp': '2025-10-01 04:23:33.140589', 'step': 5565, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:33.199284', 'step': 5565, 'epoch': 1} {'type': 'loss', 'content': 0.254854291677475, 'timestamp': '2025-10-01 04:23:33.206371', 'step': 5566, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:33.263455', 'step': 5566, 'epoch': 1} {'type': 'loss', 'content': 0.15586712956428528, 'timestamp': '2025-10-01 04:23:33.265771', 'step': 5567, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:23:33.319263', 'step': 5567, 'epoch': 1} {'type': 'loss', 'content': 0.1719435602426529, 'timestamp': '2025-10-01 04:23:33.327619', 'step': 5568, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:33.381285', 'step': 5568, 'epoch': 1} {'type': 'loss', 'content': 0.24489079415798187, 'timestamp': '2025-10-01 04:23:33.383600', 'step': 5569, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:33.441111', 'step': 5569, 'epoch': 1} {'type': 'loss', 'content': 0.14992965757846832, 'timestamp': '2025-10-01 04:23:33.443353', 'step': 5570, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:33.496761', 'step': 5570, 'epoch': 1} {'type': 'loss', 'content': 0.1953592449426651, 'timestamp': '2025-10-01 04:23:33.499982', 'step': 5571, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:23:33.554726', 'step': 5571, 'epoch': 1} {'type': 'loss', 'content': 0.11646975576877594, 'timestamp': '2025-10-01 04:23:33.560891', 'step': 5572, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:33.614554', 'step': 5572, 'epoch': 1} {'type': 'loss', 'content': 0.09132058173418045, 'timestamp': '2025-10-01 04:23:33.617411', 'step': 5573, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:33.671461', 'step': 5573, 'epoch': 1} {'type': 'loss', 'content': 0.1778118908405304, 'timestamp': '2025-10-01 04:23:33.673899', 'step': 5574, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:23:33.728842', 'step': 5574, 'epoch': 1} {'type': 'loss', 'content': 0.11955621838569641, 'timestamp': '2025-10-01 04:23:33.731377', 'step': 5575, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:33.785043', 'step': 5575, 'epoch': 1} {'type': 'loss', 'content': 0.1586485058069229, 'timestamp': '2025-10-01 04:23:33.790982', 'step': 5576, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:33.843354', 'step': 5576, 'epoch': 1} {'type': 'loss', 'content': 0.15940532088279724, 'timestamp': '2025-10-01 04:23:33.845639', 'step': 5577, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:33.900168', 'step': 5577, 'epoch': 1} {'type': 'loss', 'content': 0.13409550487995148, 'timestamp': '2025-10-01 04:23:33.902425', 'step': 5578, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:33.955796', 'step': 5578, 'epoch': 1} {'type': 'loss', 'content': 0.11554200202226639, 'timestamp': '2025-10-01 04:23:33.958051', 'step': 5579, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:23:34.011803', 'step': 5579, 'epoch': 1} {'type': 'loss', 'content': 0.18821899592876434, 'timestamp': '2025-10-01 04:23:34.017866', 'step': 5580, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:34.072276', 'step': 5580, 'epoch': 1} {'type': 'loss', 'content': 0.12323078513145447, 'timestamp': '2025-10-01 04:23:34.074417', 'step': 5581, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:34.130505', 'step': 5581, 'epoch': 1} {'type': 'loss', 'content': 0.11946015059947968, 'timestamp': '2025-10-01 04:23:34.132708', 'step': 5582, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:34.186208', 'step': 5582, 'epoch': 1} {'type': 'loss', 'content': 0.11880335956811905, 'timestamp': '2025-10-01 04:23:34.188414', 'step': 5583, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:34.241648', 'step': 5583, 'epoch': 1} {'type': 'loss', 'content': 0.14761526882648468, 'timestamp': '2025-10-01 04:23:34.247577', 'step': 5584, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:34.299655', 'step': 5584, 'epoch': 1} {'type': 'loss', 'content': 0.2544096112251282, 'timestamp': '2025-10-01 04:23:34.301826', 'step': 5585, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:34.356183', 'step': 5585, 'epoch': 1} {'type': 'loss', 'content': 0.08495844155550003, 'timestamp': '2025-10-01 04:23:34.358353', 'step': 5586, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:23:34.412066', 'step': 5586, 'epoch': 1} {'type': 'loss', 'content': 0.09065370261669159, 'timestamp': '2025-10-01 04:23:34.414609', 'step': 5587, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:34.468867', 'step': 5587, 'epoch': 1} {'type': 'loss', 'content': 0.11138596385717392, 'timestamp': '2025-10-01 04:23:34.475045', 'step': 5588, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:34.528103', 'step': 5588, 'epoch': 1} {'type': 'loss', 'content': 0.14960236847400665, 'timestamp': '2025-10-01 04:23:34.530913', 'step': 5589, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:34.595683', 'step': 5589, 'epoch': 1} {'type': 'loss', 'content': 0.098357655107975, 'timestamp': '2025-10-01 04:23:34.598059', 'step': 5590, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:34.651436', 'step': 5590, 'epoch': 1} {'type': 'loss', 'content': 0.20690667629241943, 'timestamp': '2025-10-01 04:23:34.654119', 'step': 5591, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:23:34.706865', 'step': 5591, 'epoch': 1} {'type': 'loss', 'content': 0.09176946431398392, 'timestamp': '2025-10-01 04:23:34.713091', 'step': 5592, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-01 04:23:48.176289', 'step': 5592, 'epoch': 1} {'type': 'pplx', 'content': 12861.066118053597, 'timestamp': '2025-10-01 04:23:48.179867', 'step': 5592, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:48.242793', 'step': 5592, 'epoch': 1} {'type': 'loss', 'content': 0.1447424441576004, 'timestamp': '2025-10-01 04:23:48.244843', 'step': 5593, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:48.300644', 'step': 5593, 'epoch': 1} {'type': 'loss', 'content': 0.13060688972473145, 'timestamp': '2025-10-01 04:23:48.302988', 'step': 5594, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:48.356788', 'step': 5594, 'epoch': 1} {'type': 'loss', 'content': 0.1815834641456604, 'timestamp': '2025-10-01 04:23:48.359007', 'step': 5595, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:23:48.412359', 'step': 5595, 'epoch': 1} {'type': 'loss', 'content': 0.18259482085704803, 'timestamp': '2025-10-01 04:23:48.418775', 'step': 5596, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:48.471574', 'step': 5596, 'epoch': 1} {'type': 'loss', 'content': 0.3004632890224457, 'timestamp': '2025-10-01 04:23:48.473998', 'step': 5597, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:48.527383', 'step': 5597, 'epoch': 1} {'type': 'loss', 'content': 0.11469345539808273, 'timestamp': '2025-10-01 04:23:48.530070', 'step': 5598, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:48.586032', 'step': 5598, 'epoch': 1} {'type': 'loss', 'content': 0.15171191096305847, 'timestamp': '2025-10-01 04:23:48.588101', 'step': 5599, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:48.641524', 'step': 5599, 'epoch': 1} {'type': 'loss', 'content': 0.1633724719285965, 'timestamp': '2025-10-01 04:23:48.648068', 'step': 5600, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:48.701874', 'step': 5600, 'epoch': 1} {'type': 'loss', 'content': 0.18481990694999695, 'timestamp': '2025-10-01 04:23:48.710360', 'step': 5601, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:48.765239', 'step': 5601, 'epoch': 1} {'type': 'loss', 'content': 0.1751297265291214, 'timestamp': '2025-10-01 04:23:48.767286', 'step': 5602, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:48.829281', 'step': 5602, 'epoch': 1} {'type': 'loss', 'content': 0.2243071049451828, 'timestamp': '2025-10-01 04:23:48.838111', 'step': 5603, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:48.893113', 'step': 5603, 'epoch': 1} {'type': 'loss', 'content': 0.1816936731338501, 'timestamp': '2025-10-01 04:23:48.899085', 'step': 5604, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:48.951949', 'step': 5604, 'epoch': 1} {'type': 'loss', 'content': 0.1644357591867447, 'timestamp': '2025-10-01 04:23:48.954364', 'step': 5605, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:49.008443', 'step': 5605, 'epoch': 1} {'type': 'loss', 'content': 0.06980818510055542, 'timestamp': '2025-10-01 04:23:49.010523', 'step': 5606, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:23:49.071930', 'step': 5606, 'epoch': 1} {'type': 'loss', 'content': 0.17518165707588196, 'timestamp': '2025-10-01 04:23:49.075596', 'step': 5607, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:49.128131', 'step': 5607, 'epoch': 1} {'type': 'loss', 'content': 0.2174854874610901, 'timestamp': '2025-10-01 04:23:49.134249', 'step': 5608, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:49.186739', 'step': 5608, 'epoch': 1} {'type': 'loss', 'content': 0.13645052909851074, 'timestamp': '2025-10-01 04:23:49.188934', 'step': 5609, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:23:49.241982', 'step': 5609, 'epoch': 1} {'type': 'loss', 'content': 0.10787341743707657, 'timestamp': '2025-10-01 04:23:49.244095', 'step': 5610, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:49.297449', 'step': 5610, 'epoch': 1} {'type': 'loss', 'content': 0.0868447870016098, 'timestamp': '2025-10-01 04:23:49.299775', 'step': 5611, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:23:49.352873', 'step': 5611, 'epoch': 1} {'type': 'loss', 'content': 0.15571948885917664, 'timestamp': '2025-10-01 04:23:49.359262', 'step': 5612, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:49.413042', 'step': 5612, 'epoch': 1} {'type': 'loss', 'content': 0.13064102828502655, 'timestamp': '2025-10-01 04:23:49.418545', 'step': 5613, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:49.471470', 'step': 5613, 'epoch': 1} {'type': 'loss', 'content': 0.17931652069091797, 'timestamp': '2025-10-01 04:23:49.473942', 'step': 5614, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:49.526800', 'step': 5614, 'epoch': 1} {'type': 'loss', 'content': 0.1553676426410675, 'timestamp': '2025-10-01 04:23:49.528895', 'step': 5615, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:49.581233', 'step': 5615, 'epoch': 1} {'type': 'loss', 'content': 0.1329643726348877, 'timestamp': '2025-10-01 04:23:49.587217', 'step': 5616, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:49.639779', 'step': 5616, 'epoch': 1} {'type': 'loss', 'content': 0.14923585951328278, 'timestamp': '2025-10-01 04:23:49.641872', 'step': 5617, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:49.694873', 'step': 5617, 'epoch': 1} {'type': 'loss', 'content': 0.2168567031621933, 'timestamp': '2025-10-01 04:23:49.697010', 'step': 5618, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:23:49.750094', 'step': 5618, 'epoch': 1} {'type': 'loss', 'content': 0.1337917447090149, 'timestamp': '2025-10-01 04:23:49.752043', 'step': 5619, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:49.805881', 'step': 5619, 'epoch': 1} {'type': 'loss', 'content': 0.18736079335212708, 'timestamp': '2025-10-01 04:23:49.811989', 'step': 5620, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:49.864529', 'step': 5620, 'epoch': 1} {'type': 'loss', 'content': 0.2732345759868622, 'timestamp': '2025-10-01 04:23:49.866725', 'step': 5621, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:23:49.925532', 'step': 5621, 'epoch': 1} {'type': 'loss', 'content': 0.1932319551706314, 'timestamp': '2025-10-01 04:23:49.927702', 'step': 5622, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:49.981441', 'step': 5622, 'epoch': 1} {'type': 'loss', 'content': 0.13805241882801056, 'timestamp': '2025-10-01 04:23:49.984614', 'step': 5623, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:50.037713', 'step': 5623, 'epoch': 1} {'type': 'loss', 'content': 0.17603632807731628, 'timestamp': '2025-10-01 04:23:50.043478', 'step': 5624, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:50.096146', 'step': 5624, 'epoch': 1} {'type': 'loss', 'content': 0.1977856457233429, 'timestamp': '2025-10-01 04:23:50.098397', 'step': 5625, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:50.151574', 'step': 5625, 'epoch': 1} {'type': 'loss', 'content': 0.22779060900211334, 'timestamp': '2025-10-01 04:23:50.153780', 'step': 5626, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:50.210948', 'step': 5626, 'epoch': 1} {'type': 'loss', 'content': 0.30696824193000793, 'timestamp': '2025-10-01 04:23:50.212943', 'step': 5627, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:50.266506', 'step': 5627, 'epoch': 1} {'type': 'loss', 'content': 0.15701080858707428, 'timestamp': '2025-10-01 04:23:50.272301', 'step': 5628, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:50.328663', 'step': 5628, 'epoch': 1} {'type': 'loss', 'content': 0.1807706505060196, 'timestamp': '2025-10-01 04:23:50.330569', 'step': 5629, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:50.390249', 'step': 5629, 'epoch': 1} {'type': 'loss', 'content': 0.23578386008739471, 'timestamp': '2025-10-01 04:23:50.392783', 'step': 5630, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:50.446230', 'step': 5630, 'epoch': 1} {'type': 'loss', 'content': 0.1829371452331543, 'timestamp': '2025-10-01 04:23:50.448463', 'step': 5631, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:50.501742', 'step': 5631, 'epoch': 1} {'type': 'loss', 'content': 0.16497072577476501, 'timestamp': '2025-10-01 04:23:50.508299', 'step': 5632, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:50.560908', 'step': 5632, 'epoch': 1} {'type': 'loss', 'content': 0.17760415375232697, 'timestamp': '2025-10-01 04:23:50.563116', 'step': 5633, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:50.616243', 'step': 5633, 'epoch': 1} {'type': 'loss', 'content': 0.17990925908088684, 'timestamp': '2025-10-01 04:23:50.618122', 'step': 5634, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:50.671036', 'step': 5634, 'epoch': 1} {'type': 'loss', 'content': 0.2190270572900772, 'timestamp': '2025-10-01 04:23:50.672950', 'step': 5635, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:50.735779', 'step': 5635, 'epoch': 1} {'type': 'loss', 'content': 0.18679501116275787, 'timestamp': '2025-10-01 04:23:50.747687', 'step': 5636, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:50.800183', 'step': 5636, 'epoch': 1} {'type': 'loss', 'content': 0.1586407572031021, 'timestamp': '2025-10-01 04:23:50.802307', 'step': 5637, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:23:50.856273', 'step': 5637, 'epoch': 1} {'type': 'loss', 'content': 0.1415058821439743, 'timestamp': '2025-10-01 04:23:50.858163', 'step': 5638, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:50.911366', 'step': 5638, 'epoch': 1} {'type': 'loss', 'content': 0.24887026846408844, 'timestamp': '2025-10-01 04:23:50.913622', 'step': 5639, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:23:50.966721', 'step': 5639, 'epoch': 1} {'type': 'loss', 'content': 0.16526637971401215, 'timestamp': '2025-10-01 04:23:50.973926', 'step': 5640, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:51.025951', 'step': 5640, 'epoch': 1} {'type': 'loss', 'content': 0.13471877574920654, 'timestamp': '2025-10-01 04:23:51.028168', 'step': 5641, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:23:51.081274', 'step': 5641, 'epoch': 1} {'type': 'loss', 'content': 0.2789897620677948, 'timestamp': '2025-10-01 04:23:51.083204', 'step': 5642, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:23:51.136357', 'step': 5642, 'epoch': 1} {'type': 'loss', 'content': 0.220552459359169, 'timestamp': '2025-10-01 04:23:51.138267', 'step': 5643, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:51.190846', 'step': 5643, 'epoch': 1} {'type': 'loss', 'content': 0.17507754266262054, 'timestamp': '2025-10-01 04:23:51.197622', 'step': 5644, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:51.250391', 'step': 5644, 'epoch': 1} {'type': 'loss', 'content': 0.1641038954257965, 'timestamp': '2025-10-01 04:23:51.252535', 'step': 5645, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:51.305228', 'step': 5645, 'epoch': 1} {'type': 'loss', 'content': 0.18542538583278656, 'timestamp': '2025-10-01 04:23:51.307519', 'step': 5646, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:51.362274', 'step': 5646, 'epoch': 1} {'type': 'loss', 'content': 0.09326139837503433, 'timestamp': '2025-10-01 04:23:51.365937', 'step': 5647, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:23:51.420908', 'step': 5647, 'epoch': 1} {'type': 'loss', 'content': 0.22190620005130768, 'timestamp': '2025-10-01 04:23:51.426413', 'step': 5648, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:51.479432', 'step': 5648, 'epoch': 1} {'type': 'loss', 'content': 0.17081104218959808, 'timestamp': '2025-10-01 04:23:51.482112', 'step': 5649, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:51.536132', 'step': 5649, 'epoch': 1} {'type': 'loss', 'content': 0.14524129033088684, 'timestamp': '2025-10-01 04:23:51.538180', 'step': 5650, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:51.593428', 'step': 5650, 'epoch': 1} {'type': 'loss', 'content': 0.11829987913370132, 'timestamp': '2025-10-01 04:23:51.595644', 'step': 5651, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:51.649022', 'step': 5651, 'epoch': 1} {'type': 'loss', 'content': 0.05883840098977089, 'timestamp': '2025-10-01 04:23:51.654836', 'step': 5652, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:51.707312', 'step': 5652, 'epoch': 1} {'type': 'loss', 'content': 0.10985156893730164, 'timestamp': '2025-10-01 04:23:51.710628', 'step': 5653, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:23:51.764117', 'step': 5653, 'epoch': 1} {'type': 'loss', 'content': 0.20593304932117462, 'timestamp': '2025-10-01 04:23:51.766618', 'step': 5654, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:51.820047', 'step': 5654, 'epoch': 1} {'type': 'loss', 'content': 0.16190987825393677, 'timestamp': '2025-10-01 04:23:51.824563', 'step': 5655, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:51.877390', 'step': 5655, 'epoch': 1} {'type': 'loss', 'content': 0.1864498257637024, 'timestamp': '2025-10-01 04:23:51.883394', 'step': 5656, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:51.935729', 'step': 5656, 'epoch': 1} {'type': 'loss', 'content': 0.16669867932796478, 'timestamp': '2025-10-01 04:23:51.937771', 'step': 5657, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:51.990821', 'step': 5657, 'epoch': 1} {'type': 'loss', 'content': 0.2665303647518158, 'timestamp': '2025-10-01 04:23:51.993823', 'step': 5658, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:52.048591', 'step': 5658, 'epoch': 1} {'type': 'loss', 'content': 0.17499586939811707, 'timestamp': '2025-10-01 04:23:52.050598', 'step': 5659, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:52.104776', 'step': 5659, 'epoch': 1} {'type': 'loss', 'content': 0.10630307346582413, 'timestamp': '2025-10-01 04:23:52.110534', 'step': 5660, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:52.163270', 'step': 5660, 'epoch': 1} {'type': 'loss', 'content': 0.160536989569664, 'timestamp': '2025-10-01 04:23:52.165454', 'step': 5661, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:52.219581', 'step': 5661, 'epoch': 1} {'type': 'loss', 'content': 0.18693289160728455, 'timestamp': '2025-10-01 04:23:52.221521', 'step': 5662, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:52.275066', 'step': 5662, 'epoch': 1} {'type': 'loss', 'content': 0.1061970666050911, 'timestamp': '2025-10-01 04:23:52.277434', 'step': 5663, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:52.331141', 'step': 5663, 'epoch': 1} {'type': 'loss', 'content': 0.14928172528743744, 'timestamp': '2025-10-01 04:23:52.336914', 'step': 5664, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:52.389636', 'step': 5664, 'epoch': 1} {'type': 'loss', 'content': 0.141555055975914, 'timestamp': '2025-10-01 04:23:52.391765', 'step': 5665, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:23:52.444814', 'step': 5665, 'epoch': 1} {'type': 'loss', 'content': 0.07938043773174286, 'timestamp': '2025-10-01 04:23:52.453800', 'step': 5666, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:52.507332', 'step': 5666, 'epoch': 1} {'type': 'loss', 'content': 0.14758248627185822, 'timestamp': '2025-10-01 04:23:52.520279', 'step': 5667, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:52.579452', 'step': 5667, 'epoch': 1} {'type': 'loss', 'content': 0.1249646469950676, 'timestamp': '2025-10-01 04:23:52.585451', 'step': 5668, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:23:52.639857', 'step': 5668, 'epoch': 1} {'type': 'loss', 'content': 0.18888364732265472, 'timestamp': '2025-10-01 04:23:52.641662', 'step': 5669, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:52.696937', 'step': 5669, 'epoch': 1} {'type': 'loss', 'content': 0.10864055901765823, 'timestamp': '2025-10-01 04:23:52.698584', 'step': 5670, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:52.753663', 'step': 5670, 'epoch': 1} {'type': 'loss', 'content': 0.1344424933195114, 'timestamp': '2025-10-01 04:23:52.755344', 'step': 5671, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:52.808609', 'step': 5671, 'epoch': 1} {'type': 'loss', 'content': 0.14873388409614563, 'timestamp': '2025-10-01 04:23:52.817231', 'step': 5672, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:23:52.882779', 'step': 5672, 'epoch': 1} {'type': 'loss', 'content': 0.21842366456985474, 'timestamp': '2025-10-01 04:23:52.885440', 'step': 5673, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:52.941426', 'step': 5673, 'epoch': 1} {'type': 'loss', 'content': 0.12612321972846985, 'timestamp': '2025-10-01 04:23:52.944141', 'step': 5674, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:53.000402', 'step': 5674, 'epoch': 1} {'type': 'loss', 'content': 0.15766462683677673, 'timestamp': '2025-10-01 04:23:53.002507', 'step': 5675, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:53.059812', 'step': 5675, 'epoch': 1} {'type': 'loss', 'content': 0.1729433238506317, 'timestamp': '2025-10-01 04:23:53.065180', 'step': 5676, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:53.132608', 'step': 5676, 'epoch': 1} {'type': 'loss', 'content': 0.12737075984477997, 'timestamp': '2025-10-01 04:23:53.138780', 'step': 5677, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:53.194498', 'step': 5677, 'epoch': 1} {'type': 'loss', 'content': 0.10727816820144653, 'timestamp': '2025-10-01 04:23:53.196586', 'step': 5678, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:53.250024', 'step': 5678, 'epoch': 1} {'type': 'loss', 'content': 0.0982150137424469, 'timestamp': '2025-10-01 04:23:53.252028', 'step': 5679, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:53.311146', 'step': 5679, 'epoch': 1} {'type': 'loss', 'content': 0.12566296756267548, 'timestamp': '2025-10-01 04:23:53.317123', 'step': 5680, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:53.387282', 'step': 5680, 'epoch': 1} {'type': 'loss', 'content': 0.151383638381958, 'timestamp': '2025-10-01 04:23:53.389776', 'step': 5681, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:53.444532', 'step': 5681, 'epoch': 1} {'type': 'loss', 'content': 0.1374378502368927, 'timestamp': '2025-10-01 04:23:53.446142', 'step': 5682, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:23:53.499533', 'step': 5682, 'epoch': 1} {'type': 'loss', 'content': 0.1584920734167099, 'timestamp': '2025-10-01 04:23:53.501591', 'step': 5683, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:53.555733', 'step': 5683, 'epoch': 1} {'type': 'loss', 'content': 0.26179102063179016, 'timestamp': '2025-10-01 04:23:53.561661', 'step': 5684, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:53.614981', 'step': 5684, 'epoch': 1} {'type': 'loss', 'content': 0.1488281935453415, 'timestamp': '2025-10-01 04:23:53.622230', 'step': 5685, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:53.677923', 'step': 5685, 'epoch': 1} {'type': 'loss', 'content': 0.17686273157596588, 'timestamp': '2025-10-01 04:23:53.680812', 'step': 5686, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:53.735101', 'step': 5686, 'epoch': 1} {'type': 'loss', 'content': 0.1435709148645401, 'timestamp': '2025-10-01 04:23:53.737578', 'step': 5687, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:53.791837', 'step': 5687, 'epoch': 1} {'type': 'loss', 'content': 0.2194148302078247, 'timestamp': '2025-10-01 04:23:53.797648', 'step': 5688, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:23:53.850992', 'step': 5688, 'epoch': 1} {'type': 'loss', 'content': 0.1553097665309906, 'timestamp': '2025-10-01 04:23:53.853156', 'step': 5689, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:53.914255', 'step': 5689, 'epoch': 1} {'type': 'loss', 'content': 0.19480955600738525, 'timestamp': '2025-10-01 04:23:53.918960', 'step': 5690, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:23:53.973069', 'step': 5690, 'epoch': 1} {'type': 'loss', 'content': 0.21262027323246002, 'timestamp': '2025-10-01 04:23:53.975065', 'step': 5691, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:23:54.028623', 'step': 5691, 'epoch': 1} {'type': 'loss', 'content': 0.18523505330085754, 'timestamp': '2025-10-01 04:23:54.034825', 'step': 5692, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:23:54.096127', 'step': 5692, 'epoch': 1} {'type': 'loss', 'content': 0.14050130546092987, 'timestamp': '2025-10-01 04:23:54.098015', 'step': 5693, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:54.151419', 'step': 5693, 'epoch': 1} {'type': 'loss', 'content': 0.309724897146225, 'timestamp': '2025-10-01 04:23:54.153884', 'step': 5694, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:23:54.208861', 'step': 5694, 'epoch': 1} {'type': 'loss', 'content': 0.1638575941324234, 'timestamp': '2025-10-01 04:23:54.211455', 'step': 5695, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:54.267479', 'step': 5695, 'epoch': 1} {'type': 'loss', 'content': 0.2597557604312897, 'timestamp': '2025-10-01 04:23:54.273484', 'step': 5696, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:54.326967', 'step': 5696, 'epoch': 1} {'type': 'loss', 'content': 0.10718119889497757, 'timestamp': '2025-10-01 04:23:54.329149', 'step': 5697, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:54.383005', 'step': 5697, 'epoch': 1} {'type': 'loss', 'content': 0.18531948328018188, 'timestamp': '2025-10-01 04:23:54.385705', 'step': 5698, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:54.452445', 'step': 5698, 'epoch': 1} {'type': 'loss', 'content': 0.11454588174819946, 'timestamp': '2025-10-01 04:23:54.454939', 'step': 5699, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:54.508831', 'step': 5699, 'epoch': 1} {'type': 'loss', 'content': 0.09469753503799438, 'timestamp': '2025-10-01 04:23:54.524337', 'step': 5700, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:54.576726', 'step': 5700, 'epoch': 1} {'type': 'loss', 'content': 0.18483737111091614, 'timestamp': '2025-10-01 04:23:54.579286', 'step': 5701, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:54.632931', 'step': 5701, 'epoch': 1} {'type': 'loss', 'content': 0.10757843405008316, 'timestamp': '2025-10-01 04:23:54.634954', 'step': 5702, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:54.688379', 'step': 5702, 'epoch': 1} {'type': 'loss', 'content': 0.1777425855398178, 'timestamp': '2025-10-01 04:23:54.689996', 'step': 5703, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:54.743073', 'step': 5703, 'epoch': 1} {'type': 'loss', 'content': 0.1313214898109436, 'timestamp': '2025-10-01 04:23:54.748366', 'step': 5704, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:54.800560', 'step': 5704, 'epoch': 1} {'type': 'loss', 'content': 0.1856260448694229, 'timestamp': '2025-10-01 04:23:54.802304', 'step': 5705, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:54.865578', 'step': 5705, 'epoch': 1} {'type': 'loss', 'content': 0.143204465508461, 'timestamp': '2025-10-01 04:23:54.867832', 'step': 5706, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:23:54.921437', 'step': 5706, 'epoch': 1} {'type': 'loss', 'content': 0.1674429476261139, 'timestamp': '2025-10-01 04:23:54.923595', 'step': 5707, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:23:54.977576', 'step': 5707, 'epoch': 1} {'type': 'loss', 'content': 0.1304730921983719, 'timestamp': '2025-10-01 04:23:54.983563', 'step': 5708, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:55.036749', 'step': 5708, 'epoch': 1} {'type': 'loss', 'content': 0.2257383018732071, 'timestamp': '2025-10-01 04:23:55.038828', 'step': 5709, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:55.102145', 'step': 5709, 'epoch': 1} {'type': 'loss', 'content': 0.20154912769794464, 'timestamp': '2025-10-01 04:23:55.103857', 'step': 5710, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:55.157156', 'step': 5710, 'epoch': 1} {'type': 'loss', 'content': 0.15014678239822388, 'timestamp': '2025-10-01 04:23:55.158965', 'step': 5711, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:55.211794', 'step': 5711, 'epoch': 1} {'type': 'loss', 'content': 0.08329007774591446, 'timestamp': '2025-10-01 04:23:55.217728', 'step': 5712, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:55.270294', 'step': 5712, 'epoch': 1} {'type': 'loss', 'content': 0.1468033641576767, 'timestamp': '2025-10-01 04:23:55.272292', 'step': 5713, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:55.325579', 'step': 5713, 'epoch': 1} {'type': 'loss', 'content': 0.16649487614631653, 'timestamp': '2025-10-01 04:23:55.327559', 'step': 5714, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:23:55.380286', 'step': 5714, 'epoch': 1} {'type': 'loss', 'content': 0.16783718764781952, 'timestamp': '2025-10-01 04:23:55.382489', 'step': 5715, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:23:55.436557', 'step': 5715, 'epoch': 1} {'type': 'loss', 'content': 0.11838904023170471, 'timestamp': '2025-10-01 04:23:55.442454', 'step': 5716, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:55.496297', 'step': 5716, 'epoch': 1} {'type': 'loss', 'content': 0.20719408988952637, 'timestamp': '2025-10-01 04:23:55.498799', 'step': 5717, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:23:55.551902', 'step': 5717, 'epoch': 1} {'type': 'loss', 'content': 0.13180510699748993, 'timestamp': '2025-10-01 04:23:55.553551', 'step': 5718, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:55.607963', 'step': 5718, 'epoch': 1} {'type': 'loss', 'content': 0.10510740429162979, 'timestamp': '2025-10-01 04:23:55.609856', 'step': 5719, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:55.662437', 'step': 5719, 'epoch': 1} {'type': 'loss', 'content': 0.23342247307300568, 'timestamp': '2025-10-01 04:23:55.668694', 'step': 5720, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:23:55.721784', 'step': 5720, 'epoch': 1} {'type': 'loss', 'content': 0.1484495848417282, 'timestamp': '2025-10-01 04:23:55.724037', 'step': 5721, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:55.779335', 'step': 5721, 'epoch': 1} {'type': 'loss', 'content': 0.14322030544281006, 'timestamp': '2025-10-01 04:23:55.781651', 'step': 5722, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:55.834669', 'step': 5722, 'epoch': 1} {'type': 'loss', 'content': 0.1668144315481186, 'timestamp': '2025-10-01 04:23:55.837132', 'step': 5723, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:55.890644', 'step': 5723, 'epoch': 1} {'type': 'loss', 'content': 0.13271674513816833, 'timestamp': '2025-10-01 04:23:55.896950', 'step': 5724, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:55.950309', 'step': 5724, 'epoch': 1} {'type': 'loss', 'content': 0.1315801739692688, 'timestamp': '2025-10-01 04:23:55.951880', 'step': 5725, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:23:56.005442', 'step': 5725, 'epoch': 1} {'type': 'loss', 'content': 0.21329915523529053, 'timestamp': '2025-10-01 04:23:56.007300', 'step': 5726, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:23:56.060520', 'step': 5726, 'epoch': 1} {'type': 'loss', 'content': 0.1673944890499115, 'timestamp': '2025-10-01 04:23:56.063388', 'step': 5727, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:56.125562', 'step': 5727, 'epoch': 1} {'type': 'loss', 'content': 0.12488947063684464, 'timestamp': '2025-10-01 04:23:56.131440', 'step': 5728, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:56.203633', 'step': 5728, 'epoch': 1} {'type': 'loss', 'content': 0.15048934519290924, 'timestamp': '2025-10-01 04:23:56.220205', 'step': 5729, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:56.283889', 'step': 5729, 'epoch': 1} {'type': 'loss', 'content': 0.14531491696834564, 'timestamp': '2025-10-01 04:23:56.287278', 'step': 5730, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:23:56.343785', 'step': 5730, 'epoch': 1} {'type': 'loss', 'content': 0.07951189577579498, 'timestamp': '2025-10-01 04:23:56.347533', 'step': 5731, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:56.408976', 'step': 5731, 'epoch': 1} {'type': 'loss', 'content': 0.12610705196857452, 'timestamp': '2025-10-01 04:23:56.416733', 'step': 5732, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:56.482988', 'step': 5732, 'epoch': 1} {'type': 'loss', 'content': 0.15470841526985168, 'timestamp': '2025-10-01 04:23:56.487003', 'step': 5733, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:56.559346', 'step': 5733, 'epoch': 1} {'type': 'loss', 'content': 0.08587130159139633, 'timestamp': '2025-10-01 04:23:56.563971', 'step': 5734, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:56.634338', 'step': 5734, 'epoch': 1} {'type': 'loss', 'content': 0.14599035680294037, 'timestamp': '2025-10-01 04:23:56.647975', 'step': 5735, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:23:56.711764', 'step': 5735, 'epoch': 1} {'type': 'loss', 'content': 0.14257197082042694, 'timestamp': '2025-10-01 04:23:56.727981', 'step': 5736, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:23:56.792575', 'step': 5736, 'epoch': 1} {'type': 'loss', 'content': 0.15075449645519257, 'timestamp': '2025-10-01 04:23:56.795174', 'step': 5737, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:56.863392', 'step': 5737, 'epoch': 1} {'type': 'loss', 'content': 0.09589872509241104, 'timestamp': '2025-10-01 04:23:56.865597', 'step': 5738, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:56.938449', 'step': 5738, 'epoch': 1} {'type': 'loss', 'content': 0.17146867513656616, 'timestamp': '2025-10-01 04:23:56.942805', 'step': 5739, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:57.009129', 'step': 5739, 'epoch': 1} {'type': 'loss', 'content': 0.16868630051612854, 'timestamp': '2025-10-01 04:23:57.021809', 'step': 5740, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:57.091018', 'step': 5740, 'epoch': 1} {'type': 'loss', 'content': 0.11324778944253922, 'timestamp': '2025-10-01 04:23:57.098446', 'step': 5741, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:23:57.174485', 'step': 5741, 'epoch': 1} {'type': 'loss', 'content': 0.15255536139011383, 'timestamp': '2025-10-01 04:23:57.187449', 'step': 5742, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:57.243056', 'step': 5742, 'epoch': 1} {'type': 'loss', 'content': 0.1356297731399536, 'timestamp': '2025-10-01 04:23:57.246137', 'step': 5743, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:57.309881', 'step': 5743, 'epoch': 1} {'type': 'loss', 'content': 0.1158236712217331, 'timestamp': '2025-10-01 04:23:57.315808', 'step': 5744, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:57.368575', 'step': 5744, 'epoch': 1} {'type': 'loss', 'content': 0.11410068720579147, 'timestamp': '2025-10-01 04:23:57.370719', 'step': 5745, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:57.423821', 'step': 5745, 'epoch': 1} {'type': 'loss', 'content': 0.2042624056339264, 'timestamp': '2025-10-01 04:23:57.427901', 'step': 5746, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:57.488664', 'step': 5746, 'epoch': 1} {'type': 'loss', 'content': 0.17467929422855377, 'timestamp': '2025-10-01 04:23:57.490926', 'step': 5747, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:57.547140', 'step': 5747, 'epoch': 1} {'type': 'loss', 'content': 0.17887428402900696, 'timestamp': '2025-10-01 04:23:57.553132', 'step': 5748, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:57.605858', 'step': 5748, 'epoch': 1} {'type': 'loss', 'content': 0.21553979814052582, 'timestamp': '2025-10-01 04:23:57.607845', 'step': 5749, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:57.661065', 'step': 5749, 'epoch': 1} {'type': 'loss', 'content': 0.15430636703968048, 'timestamp': '2025-10-01 04:23:57.663231', 'step': 5750, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:57.716303', 'step': 5750, 'epoch': 1} {'type': 'loss', 'content': 0.14681152999401093, 'timestamp': '2025-10-01 04:23:57.718252', 'step': 5751, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:57.771425', 'step': 5751, 'epoch': 1} {'type': 'loss', 'content': 0.10868042707443237, 'timestamp': '2025-10-01 04:23:57.777143', 'step': 5752, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:23:57.830073', 'step': 5752, 'epoch': 1} {'type': 'loss', 'content': 0.07698944956064224, 'timestamp': '2025-10-01 04:23:57.832819', 'step': 5753, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:57.885550', 'step': 5753, 'epoch': 1} {'type': 'loss', 'content': 0.09517112374305725, 'timestamp': '2025-10-01 04:23:57.890098', 'step': 5754, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:57.946473', 'step': 5754, 'epoch': 1} {'type': 'loss', 'content': 0.11255937069654465, 'timestamp': '2025-10-01 04:23:57.948513', 'step': 5755, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:58.001922', 'step': 5755, 'epoch': 1} {'type': 'loss', 'content': 0.0994538739323616, 'timestamp': '2025-10-01 04:23:58.008033', 'step': 5756, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:58.060680', 'step': 5756, 'epoch': 1} {'type': 'loss', 'content': 0.1652856022119522, 'timestamp': '2025-10-01 04:23:58.064619', 'step': 5757, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:23:58.117640', 'step': 5757, 'epoch': 1} {'type': 'loss', 'content': 0.17422764003276825, 'timestamp': '2025-10-01 04:23:58.120271', 'step': 5758, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:58.174032', 'step': 5758, 'epoch': 1} {'type': 'loss', 'content': 0.11880755424499512, 'timestamp': '2025-10-01 04:23:58.176222', 'step': 5759, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:58.231272', 'step': 5759, 'epoch': 1} {'type': 'loss', 'content': 0.11315743625164032, 'timestamp': '2025-10-01 04:23:58.236955', 'step': 5760, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:58.289684', 'step': 5760, 'epoch': 1} {'type': 'loss', 'content': 0.1669766753911972, 'timestamp': '2025-10-01 04:23:58.291889', 'step': 5761, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:58.345342', 'step': 5761, 'epoch': 1} {'type': 'loss', 'content': 0.1575550138950348, 'timestamp': '2025-10-01 04:23:58.347522', 'step': 5762, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:58.401655', 'step': 5762, 'epoch': 1} {'type': 'loss', 'content': 0.07763887941837311, 'timestamp': '2025-10-01 04:23:58.403929', 'step': 5763, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:58.456797', 'step': 5763, 'epoch': 1} {'type': 'loss', 'content': 0.12019997090101242, 'timestamp': '2025-10-01 04:23:58.462527', 'step': 5764, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:58.515721', 'step': 5764, 'epoch': 1} {'type': 'loss', 'content': 0.3478417992591858, 'timestamp': '2025-10-01 04:23:58.517694', 'step': 5765, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:23:58.570532', 'step': 5765, 'epoch': 1} {'type': 'loss', 'content': 0.18337170779705048, 'timestamp': '2025-10-01 04:23:58.572564', 'step': 5766, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:23:58.625423', 'step': 5766, 'epoch': 1} {'type': 'loss', 'content': 0.18043479323387146, 'timestamp': '2025-10-01 04:23:58.627448', 'step': 5767, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:58.681214', 'step': 5767, 'epoch': 1} {'type': 'loss', 'content': 0.17049767076969147, 'timestamp': '2025-10-01 04:23:58.686872', 'step': 5768, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:58.739569', 'step': 5768, 'epoch': 1} {'type': 'loss', 'content': 0.15997007489204407, 'timestamp': '2025-10-01 04:23:58.741533', 'step': 5769, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:23:58.794333', 'step': 5769, 'epoch': 1} {'type': 'loss', 'content': 0.18321818113327026, 'timestamp': '2025-10-01 04:23:58.796388', 'step': 5770, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:58.849491', 'step': 5770, 'epoch': 1} {'type': 'loss', 'content': 0.21112361550331116, 'timestamp': '2025-10-01 04:23:58.851860', 'step': 5771, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:58.908189', 'step': 5771, 'epoch': 1} {'type': 'loss', 'content': 0.2900536358356476, 'timestamp': '2025-10-01 04:23:58.913959', 'step': 5772, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:58.966887', 'step': 5772, 'epoch': 1} {'type': 'loss', 'content': 0.2230295091867447, 'timestamp': '2025-10-01 04:23:58.968921', 'step': 5773, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:59.022259', 'step': 5773, 'epoch': 1} {'type': 'loss', 'content': 0.1765931397676468, 'timestamp': '2025-10-01 04:23:59.024287', 'step': 5774, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:59.077940', 'step': 5774, 'epoch': 1} {'type': 'loss', 'content': 0.19247551262378693, 'timestamp': '2025-10-01 04:23:59.079819', 'step': 5775, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:59.133461', 'step': 5775, 'epoch': 1} {'type': 'loss', 'content': 0.12946131825447083, 'timestamp': '2025-10-01 04:23:59.139000', 'step': 5776, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:59.191578', 'step': 5776, 'epoch': 1} {'type': 'loss', 'content': 0.1069272980093956, 'timestamp': '2025-10-01 04:23:59.193777', 'step': 5777, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:59.246844', 'step': 5777, 'epoch': 1} {'type': 'loss', 'content': 0.14847800135612488, 'timestamp': '2025-10-01 04:23:59.249022', 'step': 5778, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:59.302673', 'step': 5778, 'epoch': 1} {'type': 'loss', 'content': 0.20738457143306732, 'timestamp': '2025-10-01 04:23:59.304916', 'step': 5779, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:23:59.358551', 'step': 5779, 'epoch': 1} {'type': 'loss', 'content': 0.1869354248046875, 'timestamp': '2025-10-01 04:23:59.366799', 'step': 5780, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:59.419748', 'step': 5780, 'epoch': 1} {'type': 'loss', 'content': 0.1628493368625641, 'timestamp': '2025-10-01 04:23:59.421856', 'step': 5781, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:59.474353', 'step': 5781, 'epoch': 1} {'type': 'loss', 'content': 0.10062933713197708, 'timestamp': '2025-10-01 04:23:59.486931', 'step': 5782, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:59.539910', 'step': 5782, 'epoch': 1} {'type': 'loss', 'content': 0.2071181684732437, 'timestamp': '2025-10-01 04:23:59.541933', 'step': 5783, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:59.594070', 'step': 5783, 'epoch': 1} {'type': 'loss', 'content': 0.16625261306762695, 'timestamp': '2025-10-01 04:23:59.599981', 'step': 5784, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:23:59.654034', 'step': 5784, 'epoch': 1} {'type': 'loss', 'content': 0.1665862798690796, 'timestamp': '2025-10-01 04:23:59.656099', 'step': 5785, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:23:59.711353', 'step': 5785, 'epoch': 1} {'type': 'loss', 'content': 0.21750716865062714, 'timestamp': '2025-10-01 04:23:59.713417', 'step': 5786, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:59.766875', 'step': 5786, 'epoch': 1} {'type': 'loss', 'content': 0.21541859209537506, 'timestamp': '2025-10-01 04:23:59.769184', 'step': 5787, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:59.821959', 'step': 5787, 'epoch': 1} {'type': 'loss', 'content': 0.09854215383529663, 'timestamp': '2025-10-01 04:23:59.827597', 'step': 5788, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:59.879837', 'step': 5788, 'epoch': 1} {'type': 'loss', 'content': 0.17944568395614624, 'timestamp': '2025-10-01 04:23:59.881837', 'step': 5789, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:59.935346', 'step': 5789, 'epoch': 1} {'type': 'loss', 'content': 0.20732277631759644, 'timestamp': '2025-10-01 04:23:59.938250', 'step': 5790, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:23:59.992104', 'step': 5790, 'epoch': 1} {'type': 'loss', 'content': 0.15350522100925446, 'timestamp': '2025-10-01 04:23:59.994408', 'step': 5791, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:00.047447', 'step': 5791, 'epoch': 1} {'type': 'loss', 'content': 0.13368545472621918, 'timestamp': '2025-10-01 04:24:00.053056', 'step': 5792, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:00.121448', 'step': 5792, 'epoch': 1} {'type': 'loss', 'content': 0.1825980246067047, 'timestamp': '2025-10-01 04:24:00.123497', 'step': 5793, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:00.177406', 'step': 5793, 'epoch': 1} {'type': 'loss', 'content': 0.1905747652053833, 'timestamp': '2025-10-01 04:24:00.179744', 'step': 5794, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:00.251828', 'step': 5794, 'epoch': 1} {'type': 'loss', 'content': 0.17637549340724945, 'timestamp': '2025-10-01 04:24:00.254209', 'step': 5795, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:00.306952', 'step': 5795, 'epoch': 1} {'type': 'loss', 'content': 0.21939857304096222, 'timestamp': '2025-10-01 04:24:00.312990', 'step': 5796, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:00.373844', 'step': 5796, 'epoch': 1} {'type': 'loss', 'content': 0.16981267929077148, 'timestamp': '2025-10-01 04:24:00.380311', 'step': 5797, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:00.438017', 'step': 5797, 'epoch': 1} {'type': 'loss', 'content': 0.0825808197259903, 'timestamp': '2025-10-01 04:24:00.440403', 'step': 5798, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:00.495116', 'step': 5798, 'epoch': 1} {'type': 'loss', 'content': 0.12955085933208466, 'timestamp': '2025-10-01 04:24:00.498326', 'step': 5799, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:24:00.552012', 'step': 5799, 'epoch': 1} {'type': 'loss', 'content': 0.11878769844770432, 'timestamp': '2025-10-01 04:24:00.557909', 'step': 5800, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:00.614180', 'step': 5800, 'epoch': 1} {'type': 'loss', 'content': 0.2632479667663574, 'timestamp': '2025-10-01 04:24:00.617428', 'step': 5801, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:24:00.671786', 'step': 5801, 'epoch': 1} {'type': 'loss', 'content': 0.21031568944454193, 'timestamp': '2025-10-01 04:24:00.674873', 'step': 5802, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:00.730690', 'step': 5802, 'epoch': 1} {'type': 'loss', 'content': 0.0574289932847023, 'timestamp': '2025-10-01 04:24:00.732847', 'step': 5803, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:00.798486', 'step': 5803, 'epoch': 1} {'type': 'loss', 'content': 0.13054893910884857, 'timestamp': '2025-10-01 04:24:00.804301', 'step': 5804, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:00.857856', 'step': 5804, 'epoch': 1} {'type': 'loss', 'content': 0.10747314989566803, 'timestamp': '2025-10-01 04:24:00.860583', 'step': 5805, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:24:00.922791', 'step': 5805, 'epoch': 1} {'type': 'loss', 'content': 0.2025132030248642, 'timestamp': '2025-10-01 04:24:00.925319', 'step': 5806, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:00.978908', 'step': 5806, 'epoch': 1} {'type': 'loss', 'content': 0.13725382089614868, 'timestamp': '2025-10-01 04:24:00.981064', 'step': 5807, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:01.035020', 'step': 5807, 'epoch': 1} {'type': 'loss', 'content': 0.19025224447250366, 'timestamp': '2025-10-01 04:24:01.044782', 'step': 5808, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:24:01.098570', 'step': 5808, 'epoch': 1} {'type': 'loss', 'content': 0.14149419963359833, 'timestamp': '2025-10-01 04:24:01.101138', 'step': 5809, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:01.157104', 'step': 5809, 'epoch': 1} {'type': 'loss', 'content': 0.15040773153305054, 'timestamp': '2025-10-01 04:24:01.159161', 'step': 5810, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:01.224781', 'step': 5810, 'epoch': 1} {'type': 'loss', 'content': 0.07196430116891861, 'timestamp': '2025-10-01 04:24:01.226980', 'step': 5811, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:24:01.281840', 'step': 5811, 'epoch': 1} {'type': 'loss', 'content': 0.1013018861413002, 'timestamp': '2025-10-01 04:24:01.288148', 'step': 5812, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:01.360691', 'step': 5812, 'epoch': 1} {'type': 'loss', 'content': 0.18230006098747253, 'timestamp': '2025-10-01 04:24:01.377030', 'step': 5813, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:01.450781', 'step': 5813, 'epoch': 1} {'type': 'loss', 'content': 0.14929291605949402, 'timestamp': '2025-10-01 04:24:01.453080', 'step': 5814, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:01.508457', 'step': 5814, 'epoch': 1} {'type': 'loss', 'content': 0.15276572108268738, 'timestamp': '2025-10-01 04:24:01.510830', 'step': 5815, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:01.565488', 'step': 5815, 'epoch': 1} {'type': 'loss', 'content': 0.17145155370235443, 'timestamp': '2025-10-01 04:24:01.571814', 'step': 5816, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:01.627839', 'step': 5816, 'epoch': 1} {'type': 'loss', 'content': 0.13483792543411255, 'timestamp': '2025-10-01 04:24:01.629969', 'step': 5817, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:01.684361', 'step': 5817, 'epoch': 1} {'type': 'loss', 'content': 0.16104736924171448, 'timestamp': '2025-10-01 04:24:01.686418', 'step': 5818, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:01.740594', 'step': 5818, 'epoch': 1} {'type': 'loss', 'content': 0.07118963450193405, 'timestamp': '2025-10-01 04:24:01.742774', 'step': 5819, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:01.796488', 'step': 5819, 'epoch': 1} {'type': 'loss', 'content': 0.18618378043174744, 'timestamp': '2025-10-01 04:24:01.803762', 'step': 5820, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:01.857672', 'step': 5820, 'epoch': 1} {'type': 'loss', 'content': 0.13943640887737274, 'timestamp': '2025-10-01 04:24:01.859785', 'step': 5821, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:01.913467', 'step': 5821, 'epoch': 1} {'type': 'loss', 'content': 0.22075209021568298, 'timestamp': '2025-10-01 04:24:01.915685', 'step': 5822, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:01.970718', 'step': 5822, 'epoch': 1} {'type': 'loss', 'content': 0.13302701711654663, 'timestamp': '2025-10-01 04:24:01.972863', 'step': 5823, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:02.036809', 'step': 5823, 'epoch': 1} {'type': 'loss', 'content': 0.15853308141231537, 'timestamp': '2025-10-01 04:24:02.043072', 'step': 5824, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:02.096981', 'step': 5824, 'epoch': 1} {'type': 'loss', 'content': 0.1262318342924118, 'timestamp': '2025-10-01 04:24:02.099423', 'step': 5825, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:24:02.154119', 'step': 5825, 'epoch': 1} {'type': 'loss', 'content': 0.1915602833032608, 'timestamp': '2025-10-01 04:24:02.156009', 'step': 5826, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:02.222467', 'step': 5826, 'epoch': 1} {'type': 'loss', 'content': 0.15173763036727905, 'timestamp': '2025-10-01 04:24:02.224988', 'step': 5827, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:02.291680', 'step': 5827, 'epoch': 1} {'type': 'loss', 'content': 0.10101018846035004, 'timestamp': '2025-10-01 04:24:02.298837', 'step': 5828, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:02.352459', 'step': 5828, 'epoch': 1} {'type': 'loss', 'content': 0.17205996811389923, 'timestamp': '2025-10-01 04:24:02.354758', 'step': 5829, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:02.410267', 'step': 5829, 'epoch': 1} {'type': 'loss', 'content': 0.1361391395330429, 'timestamp': '2025-10-01 04:24:02.412373', 'step': 5830, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:02.466595', 'step': 5830, 'epoch': 1} {'type': 'loss', 'content': 0.207972452044487, 'timestamp': '2025-10-01 04:24:02.468712', 'step': 5831, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:02.522403', 'step': 5831, 'epoch': 1} {'type': 'loss', 'content': 0.1331128031015396, 'timestamp': '2025-10-01 04:24:02.528328', 'step': 5832, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-01 04:24:02.581398', 'step': 5832, 'epoch': 1} {'type': 'loss', 'content': 0.2099534422159195, 'timestamp': '2025-10-01 04:24:02.583368', 'step': 5833, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:02.637271', 'step': 5833, 'epoch': 1} {'type': 'loss', 'content': 0.1462138295173645, 'timestamp': '2025-10-01 04:24:02.639505', 'step': 5834, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:02.692826', 'step': 5834, 'epoch': 1} {'type': 'loss', 'content': 0.07022599875926971, 'timestamp': '2025-10-01 04:24:02.695128', 'step': 5835, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:02.748609', 'step': 5835, 'epoch': 1} {'type': 'loss', 'content': 0.18273817002773285, 'timestamp': '2025-10-01 04:24:02.754888', 'step': 5836, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:02.809368', 'step': 5836, 'epoch': 1} {'type': 'loss', 'content': 0.11477141082286835, 'timestamp': '2025-10-01 04:24:02.811841', 'step': 5837, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:02.866020', 'step': 5837, 'epoch': 1} {'type': 'loss', 'content': 0.14989838004112244, 'timestamp': '2025-10-01 04:24:02.868809', 'step': 5838, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:02.923241', 'step': 5838, 'epoch': 1} {'type': 'loss', 'content': 0.16497185826301575, 'timestamp': '2025-10-01 04:24:02.925750', 'step': 5839, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:24:02.985167', 'step': 5839, 'epoch': 1} {'type': 'loss', 'content': 0.1647779494524002, 'timestamp': '2025-10-01 04:24:02.992332', 'step': 5840, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:24:03.046619', 'step': 5840, 'epoch': 1} {'type': 'loss', 'content': 0.12325021624565125, 'timestamp': '2025-10-01 04:24:03.049096', 'step': 5841, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:03.105214', 'step': 5841, 'epoch': 1} {'type': 'loss', 'content': 0.1674194037914276, 'timestamp': '2025-10-01 04:24:03.109086', 'step': 5842, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:03.165098', 'step': 5842, 'epoch': 1} {'type': 'loss', 'content': 0.10726455599069595, 'timestamp': '2025-10-01 04:24:03.167626', 'step': 5843, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:03.222035', 'step': 5843, 'epoch': 1} {'type': 'loss', 'content': 0.13792753219604492, 'timestamp': '2025-10-01 04:24:03.238636', 'step': 5844, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:24:03.292876', 'step': 5844, 'epoch': 1} {'type': 'loss', 'content': 0.1371399611234665, 'timestamp': '2025-10-01 04:24:03.295128', 'step': 5845, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:24:03.349795', 'step': 5845, 'epoch': 1} {'type': 'loss', 'content': 0.17834362387657166, 'timestamp': '2025-10-01 04:24:03.352016', 'step': 5846, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:24:03.411134', 'step': 5846, 'epoch': 1} {'type': 'loss', 'content': 0.25562766194343567, 'timestamp': '2025-10-01 04:24:03.413703', 'step': 5847, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:03.469594', 'step': 5847, 'epoch': 1} {'type': 'loss', 'content': 0.17857874929904938, 'timestamp': '2025-10-01 04:24:03.476114', 'step': 5848, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:03.531155', 'step': 5848, 'epoch': 1} {'type': 'loss', 'content': 0.1822078973054886, 'timestamp': '2025-10-01 04:24:03.533689', 'step': 5849, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:03.590989', 'step': 5849, 'epoch': 1} {'type': 'loss', 'content': 0.235284686088562, 'timestamp': '2025-10-01 04:24:03.593980', 'step': 5850, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:24:03.649842', 'step': 5850, 'epoch': 1} {'type': 'loss', 'content': 0.2081083208322525, 'timestamp': '2025-10-01 04:24:03.653091', 'step': 5851, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:03.707814', 'step': 5851, 'epoch': 1} {'type': 'loss', 'content': 0.13931041955947876, 'timestamp': '2025-10-01 04:24:03.715075', 'step': 5852, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:03.769775', 'step': 5852, 'epoch': 1} {'type': 'loss', 'content': 0.18058046698570251, 'timestamp': '2025-10-01 04:24:03.772323', 'step': 5853, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:03.827435', 'step': 5853, 'epoch': 1} {'type': 'loss', 'content': 0.121846504509449, 'timestamp': '2025-10-01 04:24:03.830009', 'step': 5854, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:03.886342', 'step': 5854, 'epoch': 1} {'type': 'loss', 'content': 0.1974133849143982, 'timestamp': '2025-10-01 04:24:03.891930', 'step': 5855, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:24:03.949393', 'step': 5855, 'epoch': 1} {'type': 'loss', 'content': 0.16868342459201813, 'timestamp': '2025-10-01 04:24:03.958696', 'step': 5856, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:04.024240', 'step': 5856, 'epoch': 1} {'type': 'loss', 'content': 0.24985700845718384, 'timestamp': '2025-10-01 04:24:04.026656', 'step': 5857, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:04.081196', 'step': 5857, 'epoch': 1} {'type': 'loss', 'content': 0.1580408811569214, 'timestamp': '2025-10-01 04:24:04.084044', 'step': 5858, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:04.138447', 'step': 5858, 'epoch': 1} {'type': 'loss', 'content': 0.18421071767807007, 'timestamp': '2025-10-01 04:24:04.140449', 'step': 5859, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:24:04.194185', 'step': 5859, 'epoch': 1} {'type': 'loss', 'content': 0.2438047081232071, 'timestamp': '2025-10-01 04:24:04.200592', 'step': 5860, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:04.254219', 'step': 5860, 'epoch': 1} {'type': 'loss', 'content': 0.15607190132141113, 'timestamp': '2025-10-01 04:24:04.256687', 'step': 5861, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:04.326964', 'step': 5861, 'epoch': 1} {'type': 'loss', 'content': 0.18250688910484314, 'timestamp': '2025-10-01 04:24:04.329272', 'step': 5862, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:24:04.383068', 'step': 5862, 'epoch': 1} {'type': 'loss', 'content': 0.1488523781299591, 'timestamp': '2025-10-01 04:24:04.385597', 'step': 5863, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:04.440013', 'step': 5863, 'epoch': 1} {'type': 'loss', 'content': 0.10687924921512604, 'timestamp': '2025-10-01 04:24:04.446404', 'step': 5864, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:04.504274', 'step': 5864, 'epoch': 1} {'type': 'loss', 'content': 0.13477498292922974, 'timestamp': '2025-10-01 04:24:04.506401', 'step': 5865, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:04.559546', 'step': 5865, 'epoch': 1} {'type': 'loss', 'content': 0.1752328872680664, 'timestamp': '2025-10-01 04:24:04.561623', 'step': 5866, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:04.614428', 'step': 5866, 'epoch': 1} {'type': 'loss', 'content': 0.18699409067630768, 'timestamp': '2025-10-01 04:24:04.616556', 'step': 5867, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:04.670181', 'step': 5867, 'epoch': 1} {'type': 'loss', 'content': 0.14823944866657257, 'timestamp': '2025-10-01 04:24:04.675905', 'step': 5868, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:04.729981', 'step': 5868, 'epoch': 1} {'type': 'loss', 'content': 0.15413014590740204, 'timestamp': '2025-10-01 04:24:04.732143', 'step': 5869, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:24:04.787313', 'step': 5869, 'epoch': 1} {'type': 'loss', 'content': 0.18835945427417755, 'timestamp': '2025-10-01 04:24:04.789449', 'step': 5870, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:04.843269', 'step': 5870, 'epoch': 1} {'type': 'loss', 'content': 0.13627350330352783, 'timestamp': '2025-10-01 04:24:04.847214', 'step': 5871, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:04.900935', 'step': 5871, 'epoch': 1} {'type': 'loss', 'content': 0.0869082659482956, 'timestamp': '2025-10-01 04:24:04.906934', 'step': 5872, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:04.961129', 'step': 5872, 'epoch': 1} {'type': 'loss', 'content': 0.16614589095115662, 'timestamp': '2025-10-01 04:24:04.963853', 'step': 5873, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:05.016829', 'step': 5873, 'epoch': 1} {'type': 'loss', 'content': 0.08894510567188263, 'timestamp': '2025-10-01 04:24:05.019117', 'step': 5874, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:05.073263', 'step': 5874, 'epoch': 1} {'type': 'loss', 'content': 0.1692303568124771, 'timestamp': '2025-10-01 04:24:05.075380', 'step': 5875, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:05.135748', 'step': 5875, 'epoch': 1} {'type': 'loss', 'content': 0.1684151589870453, 'timestamp': '2025-10-01 04:24:05.143085', 'step': 5876, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:05.207220', 'step': 5876, 'epoch': 1} {'type': 'loss', 'content': 0.11228866875171661, 'timestamp': '2025-10-01 04:24:05.210553', 'step': 5877, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:05.266503', 'step': 5877, 'epoch': 1} {'type': 'loss', 'content': 0.09408970922231674, 'timestamp': '2025-10-01 04:24:05.268665', 'step': 5878, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:05.341787', 'step': 5878, 'epoch': 1} {'type': 'loss', 'content': 0.16070318222045898, 'timestamp': '2025-10-01 04:24:05.343900', 'step': 5879, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:24:05.398642', 'step': 5879, 'epoch': 1} {'type': 'loss', 'content': 0.17213544249534607, 'timestamp': '2025-10-01 04:24:05.404330', 'step': 5880, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:05.457349', 'step': 5880, 'epoch': 1} {'type': 'loss', 'content': 0.2231844812631607, 'timestamp': '2025-10-01 04:24:05.459430', 'step': 5881, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:05.513629', 'step': 5881, 'epoch': 1} {'type': 'loss', 'content': 0.16815407574176788, 'timestamp': '2025-10-01 04:24:05.515585', 'step': 5882, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:24:05.569143', 'step': 5882, 'epoch': 1} {'type': 'loss', 'content': 0.1302986592054367, 'timestamp': '2025-10-01 04:24:05.571333', 'step': 5883, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:05.623936', 'step': 5883, 'epoch': 1} {'type': 'loss', 'content': 0.21740618348121643, 'timestamp': '2025-10-01 04:24:05.629470', 'step': 5884, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:05.682999', 'step': 5884, 'epoch': 1} {'type': 'loss', 'content': 0.05427180975675583, 'timestamp': '2025-10-01 04:24:05.685231', 'step': 5885, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:05.738813', 'step': 5885, 'epoch': 1} {'type': 'loss', 'content': 0.16851164400577545, 'timestamp': '2025-10-01 04:24:05.743058', 'step': 5886, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:05.803000', 'step': 5886, 'epoch': 1} {'type': 'loss', 'content': 0.11955293267965317, 'timestamp': '2025-10-01 04:24:05.804994', 'step': 5887, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:05.858225', 'step': 5887, 'epoch': 1} {'type': 'loss', 'content': 0.1600433588027954, 'timestamp': '2025-10-01 04:24:05.864339', 'step': 5888, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:05.917794', 'step': 5888, 'epoch': 1} {'type': 'loss', 'content': 0.2899353504180908, 'timestamp': '2025-10-01 04:24:05.930031', 'step': 5889, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:05.983436', 'step': 5889, 'epoch': 1} {'type': 'loss', 'content': 0.13139677047729492, 'timestamp': '2025-10-01 04:24:05.985633', 'step': 5890, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:24:06.038770', 'step': 5890, 'epoch': 1} {'type': 'loss', 'content': 0.18788565695285797, 'timestamp': '2025-10-01 04:24:06.046526', 'step': 5891, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:06.106762', 'step': 5891, 'epoch': 1} {'type': 'loss', 'content': 0.10680879652500153, 'timestamp': '2025-10-01 04:24:06.112388', 'step': 5892, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:06.165179', 'step': 5892, 'epoch': 1} {'type': 'loss', 'content': 0.101197250187397, 'timestamp': '2025-10-01 04:24:06.167097', 'step': 5893, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:06.219708', 'step': 5893, 'epoch': 1} {'type': 'loss', 'content': 0.15723641216754913, 'timestamp': '2025-10-01 04:24:06.221774', 'step': 5894, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:06.274935', 'step': 5894, 'epoch': 1} {'type': 'loss', 'content': 0.22603042423725128, 'timestamp': '2025-10-01 04:24:06.282642', 'step': 5895, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:24:06.343150', 'step': 5895, 'epoch': 1} {'type': 'loss', 'content': 0.17526541650295258, 'timestamp': '2025-10-01 04:24:06.349399', 'step': 5896, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:06.402941', 'step': 5896, 'epoch': 1} {'type': 'loss', 'content': 0.21553093194961548, 'timestamp': '2025-10-01 04:24:06.405609', 'step': 5897, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:06.462669', 'step': 5897, 'epoch': 1} {'type': 'loss', 'content': 0.1445162296295166, 'timestamp': '2025-10-01 04:24:06.465878', 'step': 5898, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:24:06.524337', 'step': 5898, 'epoch': 1} {'type': 'loss', 'content': 0.0983489379286766, 'timestamp': '2025-10-01 04:24:06.526419', 'step': 5899, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:06.579676', 'step': 5899, 'epoch': 1} {'type': 'loss', 'content': 0.2048056274652481, 'timestamp': '2025-10-01 04:24:06.585415', 'step': 5900, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:06.638095', 'step': 5900, 'epoch': 1} {'type': 'loss', 'content': 0.14084558188915253, 'timestamp': '2025-10-01 04:24:06.640079', 'step': 5901, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:06.692213', 'step': 5901, 'epoch': 1} {'type': 'loss', 'content': 0.1567782312631607, 'timestamp': '2025-10-01 04:24:06.694661', 'step': 5902, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:06.747771', 'step': 5902, 'epoch': 1} {'type': 'loss', 'content': 0.10432150959968567, 'timestamp': '2025-10-01 04:24:06.749945', 'step': 5903, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:24:06.803783', 'step': 5903, 'epoch': 1} {'type': 'loss', 'content': 0.29391008615493774, 'timestamp': '2025-10-01 04:24:06.809234', 'step': 5904, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:06.873116', 'step': 5904, 'epoch': 1} {'type': 'loss', 'content': 0.17026278376579285, 'timestamp': '2025-10-01 04:24:06.875296', 'step': 5905, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:06.929904', 'step': 5905, 'epoch': 1} {'type': 'loss', 'content': 0.20958201587200165, 'timestamp': '2025-10-01 04:24:06.932060', 'step': 5906, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:06.986108', 'step': 5906, 'epoch': 1} {'type': 'loss', 'content': 0.17693816125392914, 'timestamp': '2025-10-01 04:24:06.987990', 'step': 5907, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:07.040705', 'step': 5907, 'epoch': 1} {'type': 'loss', 'content': 0.149473637342453, 'timestamp': '2025-10-01 04:24:07.046163', 'step': 5908, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:07.099667', 'step': 5908, 'epoch': 1} {'type': 'loss', 'content': 0.13722462952136993, 'timestamp': '2025-10-01 04:24:07.101833', 'step': 5909, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:07.154979', 'step': 5909, 'epoch': 1} {'type': 'loss', 'content': 0.17758536338806152, 'timestamp': '2025-10-01 04:24:07.157008', 'step': 5910, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:07.212166', 'step': 5910, 'epoch': 1} {'type': 'loss', 'content': 0.1447710245847702, 'timestamp': '2025-10-01 04:24:07.214244', 'step': 5911, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:07.269210', 'step': 5911, 'epoch': 1} {'type': 'loss', 'content': 0.12992030382156372, 'timestamp': '2025-10-01 04:24:07.275290', 'step': 5912, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:07.328718', 'step': 5912, 'epoch': 1} {'type': 'loss', 'content': 0.13112154603004456, 'timestamp': '2025-10-01 04:24:07.330800', 'step': 5913, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:07.384662', 'step': 5913, 'epoch': 1} {'type': 'loss', 'content': 0.30085283517837524, 'timestamp': '2025-10-01 04:24:07.386909', 'step': 5914, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:24:07.441084', 'step': 5914, 'epoch': 1} {'type': 'loss', 'content': 0.23982763290405273, 'timestamp': '2025-10-01 04:24:07.443478', 'step': 5915, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:07.499113', 'step': 5915, 'epoch': 1} {'type': 'loss', 'content': 0.14752325415611267, 'timestamp': '2025-10-01 04:24:07.505052', 'step': 5916, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:07.558662', 'step': 5916, 'epoch': 1} {'type': 'loss', 'content': 0.18107497692108154, 'timestamp': '2025-10-01 04:24:07.560931', 'step': 5917, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:07.614160', 'step': 5917, 'epoch': 1} {'type': 'loss', 'content': 0.12179233878850937, 'timestamp': '2025-10-01 04:24:07.616277', 'step': 5918, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:07.670027', 'step': 5918, 'epoch': 1} {'type': 'loss', 'content': 0.19277580082416534, 'timestamp': '2025-10-01 04:24:07.672112', 'step': 5919, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:07.725122', 'step': 5919, 'epoch': 1} {'type': 'loss', 'content': 0.185132697224617, 'timestamp': '2025-10-01 04:24:07.731246', 'step': 5920, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:07.791825', 'step': 5920, 'epoch': 1} {'type': 'loss', 'content': 0.13020217418670654, 'timestamp': '2025-10-01 04:24:07.794030', 'step': 5921, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:24:07.847569', 'step': 5921, 'epoch': 1} {'type': 'loss', 'content': 0.2254432737827301, 'timestamp': '2025-10-01 04:24:07.849589', 'step': 5922, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:07.903161', 'step': 5922, 'epoch': 1} {'type': 'loss', 'content': 0.22411181032657623, 'timestamp': '2025-10-01 04:24:07.904975', 'step': 5923, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:07.958223', 'step': 5923, 'epoch': 1} {'type': 'loss', 'content': 0.15425221621990204, 'timestamp': '2025-10-01 04:24:07.964531', 'step': 5924, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:08.017802', 'step': 5924, 'epoch': 1} {'type': 'loss', 'content': 0.09186332672834396, 'timestamp': '2025-10-01 04:24:08.019785', 'step': 5925, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:08.071735', 'step': 5925, 'epoch': 1} {'type': 'loss', 'content': 0.14028102159500122, 'timestamp': '2025-10-01 04:24:08.073947', 'step': 5926, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:08.126950', 'step': 5926, 'epoch': 1} {'type': 'loss', 'content': 0.1044771820306778, 'timestamp': '2025-10-01 04:24:08.128989', 'step': 5927, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:08.181701', 'step': 5927, 'epoch': 1} {'type': 'loss', 'content': 0.08419609069824219, 'timestamp': '2025-10-01 04:24:08.187678', 'step': 5928, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:08.242911', 'step': 5928, 'epoch': 1} {'type': 'loss', 'content': 0.09552551805973053, 'timestamp': '2025-10-01 04:24:08.245108', 'step': 5929, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:08.298677', 'step': 5929, 'epoch': 1} {'type': 'loss', 'content': 0.07426464557647705, 'timestamp': '2025-10-01 04:24:08.300836', 'step': 5930, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:08.354683', 'step': 5930, 'epoch': 1} {'type': 'loss', 'content': 0.18482407927513123, 'timestamp': '2025-10-01 04:24:08.356888', 'step': 5931, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:08.410927', 'step': 5931, 'epoch': 1} {'type': 'loss', 'content': 0.17263753712177277, 'timestamp': '2025-10-01 04:24:08.417174', 'step': 5932, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:08.470527', 'step': 5932, 'epoch': 1} {'type': 'loss', 'content': 0.21848079562187195, 'timestamp': '2025-10-01 04:24:08.475622', 'step': 5933, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:08.530660', 'step': 5933, 'epoch': 1} {'type': 'loss', 'content': 0.12552884221076965, 'timestamp': '2025-10-01 04:24:08.532842', 'step': 5934, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:08.586430', 'step': 5934, 'epoch': 1} {'type': 'loss', 'content': 0.1965164840221405, 'timestamp': '2025-10-01 04:24:08.588631', 'step': 5935, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:08.642197', 'step': 5935, 'epoch': 1} {'type': 'loss', 'content': 0.12179029732942581, 'timestamp': '2025-10-01 04:24:08.656034', 'step': 5936, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:24:08.708875', 'step': 5936, 'epoch': 1} {'type': 'loss', 'content': 0.1296277642250061, 'timestamp': '2025-10-01 04:24:08.711233', 'step': 5937, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:08.765319', 'step': 5937, 'epoch': 1} {'type': 'loss', 'content': 0.17066478729248047, 'timestamp': '2025-10-01 04:24:08.769043', 'step': 5938, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:08.822697', 'step': 5938, 'epoch': 1} {'type': 'loss', 'content': 0.16447395086288452, 'timestamp': '2025-10-01 04:24:08.824798', 'step': 5939, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:08.878745', 'step': 5939, 'epoch': 1} {'type': 'loss', 'content': 0.3447551429271698, 'timestamp': '2025-10-01 04:24:08.899474', 'step': 5940, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:08.952391', 'step': 5940, 'epoch': 1} {'type': 'loss', 'content': 0.19337454438209534, 'timestamp': '2025-10-01 04:24:08.954571', 'step': 5941, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:09.007377', 'step': 5941, 'epoch': 1} {'type': 'loss', 'content': 0.15444394946098328, 'timestamp': '2025-10-01 04:24:09.009527', 'step': 5942, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:24:09.063680', 'step': 5942, 'epoch': 1} {'type': 'loss', 'content': 0.09148935973644257, 'timestamp': '2025-10-01 04:24:09.065894', 'step': 5943, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:09.119935', 'step': 5943, 'epoch': 1} {'type': 'loss', 'content': 0.18161645531654358, 'timestamp': '2025-10-01 04:24:09.125946', 'step': 5944, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:24:09.179541', 'step': 5944, 'epoch': 1} {'type': 'loss', 'content': 0.23617027699947357, 'timestamp': '2025-10-01 04:24:09.181604', 'step': 5945, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:24:09.234467', 'step': 5945, 'epoch': 1} {'type': 'loss', 'content': 0.12014425545930862, 'timestamp': '2025-10-01 04:24:09.236594', 'step': 5946, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:24:09.289853', 'step': 5946, 'epoch': 1} {'type': 'loss', 'content': 0.15177375078201294, 'timestamp': '2025-10-01 04:24:09.292017', 'step': 5947, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:09.345461', 'step': 5947, 'epoch': 1} {'type': 'loss', 'content': 0.2016899436712265, 'timestamp': '2025-10-01 04:24:09.351347', 'step': 5948, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:09.405253', 'step': 5948, 'epoch': 1} {'type': 'loss', 'content': 0.21004758775234222, 'timestamp': '2025-10-01 04:24:09.407366', 'step': 5949, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:09.461138', 'step': 5949, 'epoch': 1} {'type': 'loss', 'content': 0.1969623565673828, 'timestamp': '2025-10-01 04:24:09.463038', 'step': 5950, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:24:09.517033', 'step': 5950, 'epoch': 1} {'type': 'loss', 'content': 0.1726161539554596, 'timestamp': '2025-10-01 04:24:09.519049', 'step': 5951, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:09.573484', 'step': 5951, 'epoch': 1} {'type': 'loss', 'content': 0.16533182561397552, 'timestamp': '2025-10-01 04:24:09.579718', 'step': 5952, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:09.632773', 'step': 5952, 'epoch': 1} {'type': 'loss', 'content': 0.148141548037529, 'timestamp': '2025-10-01 04:24:09.634795', 'step': 5953, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:09.688361', 'step': 5953, 'epoch': 1} {'type': 'loss', 'content': 0.11546500772237778, 'timestamp': '2025-10-01 04:24:09.690651', 'step': 5954, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:09.743867', 'step': 5954, 'epoch': 1} {'type': 'loss', 'content': 0.13407351076602936, 'timestamp': '2025-10-01 04:24:09.746880', 'step': 5955, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:09.800254', 'step': 5955, 'epoch': 1} {'type': 'loss', 'content': 0.1297142505645752, 'timestamp': '2025-10-01 04:24:09.806153', 'step': 5956, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:09.858731', 'step': 5956, 'epoch': 1} {'type': 'loss', 'content': 0.1421499103307724, 'timestamp': '2025-10-01 04:24:09.860789', 'step': 5957, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:09.914349', 'step': 5957, 'epoch': 1} {'type': 'loss', 'content': 0.17414842545986176, 'timestamp': '2025-10-01 04:24:09.916569', 'step': 5958, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:09.970521', 'step': 5958, 'epoch': 1} {'type': 'loss', 'content': 0.19790920615196228, 'timestamp': '2025-10-01 04:24:09.972686', 'step': 5959, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:10.026995', 'step': 5959, 'epoch': 1} {'type': 'loss', 'content': 0.07565803080797195, 'timestamp': '2025-10-01 04:24:10.042476', 'step': 5960, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:10.095983', 'step': 5960, 'epoch': 1} {'type': 'loss', 'content': 0.26443636417388916, 'timestamp': '2025-10-01 04:24:10.098006', 'step': 5961, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:10.150634', 'step': 5961, 'epoch': 1} {'type': 'loss', 'content': 0.12674984335899353, 'timestamp': '2025-10-01 04:24:10.152585', 'step': 5962, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:10.206631', 'step': 5962, 'epoch': 1} {'type': 'loss', 'content': 0.11445042490959167, 'timestamp': '2025-10-01 04:24:10.208620', 'step': 5963, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:10.261674', 'step': 5963, 'epoch': 1} {'type': 'loss', 'content': 0.08494153618812561, 'timestamp': '2025-10-01 04:24:10.267137', 'step': 5964, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:10.319862', 'step': 5964, 'epoch': 1} {'type': 'loss', 'content': 0.11863717436790466, 'timestamp': '2025-10-01 04:24:10.321760', 'step': 5965, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:10.374813', 'step': 5965, 'epoch': 1} {'type': 'loss', 'content': 0.13816861808300018, 'timestamp': '2025-10-01 04:24:10.376965', 'step': 5966, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:10.429709', 'step': 5966, 'epoch': 1} {'type': 'loss', 'content': 0.11809537559747696, 'timestamp': '2025-10-01 04:24:10.431618', 'step': 5967, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:24:10.484634', 'step': 5967, 'epoch': 1} {'type': 'loss', 'content': 0.1635003685951233, 'timestamp': '2025-10-01 04:24:10.490891', 'step': 5968, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:24:10.544682', 'step': 5968, 'epoch': 1} {'type': 'loss', 'content': 0.10726917535066605, 'timestamp': '2025-10-01 04:24:10.546622', 'step': 5969, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:10.600425', 'step': 5969, 'epoch': 1} {'type': 'loss', 'content': 0.17403896152973175, 'timestamp': '2025-10-01 04:24:10.602520', 'step': 5970, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:24:10.656809', 'step': 5970, 'epoch': 1} {'type': 'loss', 'content': 0.11891050636768341, 'timestamp': '2025-10-01 04:24:10.658719', 'step': 5971, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-01 04:24:10.717280', 'step': 5971, 'epoch': 1} {'type': 'loss', 'content': 0.13745959103107452, 'timestamp': '2025-10-01 04:24:10.724906', 'step': 5972, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:10.778608', 'step': 5972, 'epoch': 1} {'type': 'loss', 'content': 0.15404386818408966, 'timestamp': '2025-10-01 04:24:10.780882', 'step': 5973, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:10.834758', 'step': 5973, 'epoch': 1} {'type': 'loss', 'content': 0.1359262466430664, 'timestamp': '2025-10-01 04:24:10.836992', 'step': 5974, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:10.890864', 'step': 5974, 'epoch': 1} {'type': 'loss', 'content': 0.22109435498714447, 'timestamp': '2025-10-01 04:24:10.892783', 'step': 5975, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:10.945344', 'step': 5975, 'epoch': 1} {'type': 'loss', 'content': 0.14539611339569092, 'timestamp': '2025-10-01 04:24:10.951221', 'step': 5976, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:11.004903', 'step': 5976, 'epoch': 1} {'type': 'loss', 'content': 0.1515175700187683, 'timestamp': '2025-10-01 04:24:11.006897', 'step': 5977, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:11.061456', 'step': 5977, 'epoch': 1} {'type': 'loss', 'content': 0.11847931891679764, 'timestamp': '2025-10-01 04:24:11.064163', 'step': 5978, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:24:11.119703', 'step': 5978, 'epoch': 1} {'type': 'loss', 'content': 0.2476159930229187, 'timestamp': '2025-10-01 04:24:11.122242', 'step': 5979, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-01 04:24:11.176127', 'step': 5979, 'epoch': 1} {'type': 'loss', 'content': 0.1401684284210205, 'timestamp': '2025-10-01 04:24:11.182020', 'step': 5980, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:11.235263', 'step': 5980, 'epoch': 1} {'type': 'loss', 'content': 0.14485394954681396, 'timestamp': '2025-10-01 04:24:11.237437', 'step': 5981, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:11.295380', 'step': 5981, 'epoch': 1} {'type': 'loss', 'content': 0.17428860068321228, 'timestamp': '2025-10-01 04:24:11.297838', 'step': 5982, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:11.364188', 'step': 5982, 'epoch': 1} {'type': 'loss', 'content': 0.18267826735973358, 'timestamp': '2025-10-01 04:24:11.366368', 'step': 5983, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:24:11.420692', 'step': 5983, 'epoch': 1} {'type': 'loss', 'content': 0.2951788008213043, 'timestamp': '2025-10-01 04:24:11.426649', 'step': 5984, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:11.480396', 'step': 5984, 'epoch': 1} {'type': 'loss', 'content': 0.15277935564517975, 'timestamp': '2025-10-01 04:24:11.482467', 'step': 5985, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:11.548455', 'step': 5985, 'epoch': 1} {'type': 'loss', 'content': 0.09336791187524796, 'timestamp': '2025-10-01 04:24:11.550754', 'step': 5986, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:11.604866', 'step': 5986, 'epoch': 1} {'type': 'loss', 'content': 0.12411165237426758, 'timestamp': '2025-10-01 04:24:11.607396', 'step': 5987, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:24:11.661392', 'step': 5987, 'epoch': 1} {'type': 'loss', 'content': 0.14954546093940735, 'timestamp': '2025-10-01 04:24:11.667566', 'step': 5988, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:11.721359', 'step': 5988, 'epoch': 1} {'type': 'loss', 'content': 0.12413952499628067, 'timestamp': '2025-10-01 04:24:11.723945', 'step': 5989, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:11.778166', 'step': 5989, 'epoch': 1} {'type': 'loss', 'content': 0.24696084856987, 'timestamp': '2025-10-01 04:24:11.780610', 'step': 5990, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:11.844201', 'step': 5990, 'epoch': 1} {'type': 'loss', 'content': 0.11006538569927216, 'timestamp': '2025-10-01 04:24:11.846412', 'step': 5991, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:11.903839', 'step': 5991, 'epoch': 1} {'type': 'loss', 'content': 0.08245661854743958, 'timestamp': '2025-10-01 04:24:11.924003', 'step': 5992, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:11.977468', 'step': 5992, 'epoch': 1} {'type': 'loss', 'content': 0.22757866978645325, 'timestamp': '2025-10-01 04:24:11.979786', 'step': 5993, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:12.036018', 'step': 5993, 'epoch': 1} {'type': 'loss', 'content': 0.17711636424064636, 'timestamp': '2025-10-01 04:24:12.038031', 'step': 5994, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:12.091871', 'step': 5994, 'epoch': 1} {'type': 'loss', 'content': 0.2475823014974594, 'timestamp': '2025-10-01 04:24:12.096696', 'step': 5995, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:12.150795', 'step': 5995, 'epoch': 1} {'type': 'loss', 'content': 0.1690734624862671, 'timestamp': '2025-10-01 04:24:12.156872', 'step': 5996, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:12.211387', 'step': 5996, 'epoch': 1} {'type': 'loss', 'content': 0.16048011183738708, 'timestamp': '2025-10-01 04:24:12.213553', 'step': 5997, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:12.269334', 'step': 5997, 'epoch': 1} {'type': 'loss', 'content': 0.15473534166812897, 'timestamp': '2025-10-01 04:24:12.271835', 'step': 5998, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:12.342026', 'step': 5998, 'epoch': 1} {'type': 'loss', 'content': 0.20388327538967133, 'timestamp': '2025-10-01 04:24:12.344540', 'step': 5999, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:12.399658', 'step': 5999, 'epoch': 1} {'type': 'loss', 'content': 0.2101440727710724, 'timestamp': '2025-10-01 04:24:12.405570', 'step': 6000, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 6000', 'timestamp': '2025-10-01 04:24:12.770424', 'step': 6000, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:12.826392', 'step': 6000, 'epoch': 1} {'type': 'loss', 'content': 0.14776232838630676, 'timestamp': '2025-10-01 04:24:12.828647', 'step': 6001, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:12.882573', 'step': 6001, 'epoch': 1} {'type': 'loss', 'content': 0.1727980077266693, 'timestamp': '2025-10-01 04:24:12.884907', 'step': 6002, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:12.956192', 'step': 6002, 'epoch': 1} {'type': 'loss', 'content': 0.18490926921367645, 'timestamp': '2025-10-01 04:24:12.958287', 'step': 6003, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:13.011518', 'step': 6003, 'epoch': 1} {'type': 'loss', 'content': 0.12176579236984253, 'timestamp': '2025-10-01 04:24:13.017043', 'step': 6004, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:13.069378', 'step': 6004, 'epoch': 1} {'type': 'loss', 'content': 0.14690342545509338, 'timestamp': '2025-10-01 04:24:13.071600', 'step': 6005, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:13.127869', 'step': 6005, 'epoch': 1} {'type': 'loss', 'content': 0.09638124704360962, 'timestamp': '2025-10-01 04:24:13.130070', 'step': 6006, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:13.183433', 'step': 6006, 'epoch': 1} {'type': 'loss', 'content': 0.21409747004508972, 'timestamp': '2025-10-01 04:24:13.185732', 'step': 6007, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:24:13.239180', 'step': 6007, 'epoch': 1} {'type': 'loss', 'content': 0.08985639363527298, 'timestamp': '2025-10-01 04:24:13.254289', 'step': 6008, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:13.309423', 'step': 6008, 'epoch': 1} {'type': 'loss', 'content': 0.12175358831882477, 'timestamp': '2025-10-01 04:24:13.321284', 'step': 6009, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:13.374682', 'step': 6009, 'epoch': 1} {'type': 'loss', 'content': 0.15142227709293365, 'timestamp': '2025-10-01 04:24:13.376692', 'step': 6010, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:13.436461', 'step': 6010, 'epoch': 1} {'type': 'loss', 'content': 0.18950136005878448, 'timestamp': '2025-10-01 04:24:13.438891', 'step': 6011, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:13.510948', 'step': 6011, 'epoch': 1} {'type': 'loss', 'content': 0.14591918885707855, 'timestamp': '2025-10-01 04:24:13.517204', 'step': 6012, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:24:13.571207', 'step': 6012, 'epoch': 1} {'type': 'loss', 'content': 0.1904735416173935, 'timestamp': '2025-10-01 04:24:13.573901', 'step': 6013, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:13.627104', 'step': 6013, 'epoch': 1} {'type': 'loss', 'content': 0.18775789439678192, 'timestamp': '2025-10-01 04:24:13.629881', 'step': 6014, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:24:13.683935', 'step': 6014, 'epoch': 1} {'type': 'loss', 'content': 0.12662553787231445, 'timestamp': '2025-10-01 04:24:13.686940', 'step': 6015, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:13.741845', 'step': 6015, 'epoch': 1} {'type': 'loss', 'content': 0.1714416742324829, 'timestamp': '2025-10-01 04:24:13.749073', 'step': 6016, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:13.802795', 'step': 6016, 'epoch': 1} {'type': 'loss', 'content': 0.1573283076286316, 'timestamp': '2025-10-01 04:24:13.804782', 'step': 6017, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:13.857621', 'step': 6017, 'epoch': 1} {'type': 'loss', 'content': 0.1041945219039917, 'timestamp': '2025-10-01 04:24:13.871327', 'step': 6018, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:13.925172', 'step': 6018, 'epoch': 1} {'type': 'loss', 'content': 0.1019430011510849, 'timestamp': '2025-10-01 04:24:13.928657', 'step': 6019, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:13.985573', 'step': 6019, 'epoch': 1} {'type': 'loss', 'content': 0.22066698968410492, 'timestamp': '2025-10-01 04:24:14.003699', 'step': 6020, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:14.057742', 'step': 6020, 'epoch': 1} {'type': 'loss', 'content': 0.18604058027267456, 'timestamp': '2025-10-01 04:24:14.059881', 'step': 6021, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:14.113347', 'step': 6021, 'epoch': 1} {'type': 'loss', 'content': 0.11040819436311722, 'timestamp': '2025-10-01 04:24:14.115435', 'step': 6022, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:14.168626', 'step': 6022, 'epoch': 1} {'type': 'loss', 'content': 0.10490509867668152, 'timestamp': '2025-10-01 04:24:14.170865', 'step': 6023, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:14.223818', 'step': 6023, 'epoch': 1} {'type': 'loss', 'content': 0.15633514523506165, 'timestamp': '2025-10-01 04:24:14.229520', 'step': 6024, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:14.282863', 'step': 6024, 'epoch': 1} {'type': 'loss', 'content': 0.13440881669521332, 'timestamp': '2025-10-01 04:24:14.284783', 'step': 6025, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:14.337505', 'step': 6025, 'epoch': 1} {'type': 'loss', 'content': 0.1449076533317566, 'timestamp': '2025-10-01 04:24:14.339773', 'step': 6026, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:14.393317', 'step': 6026, 'epoch': 1} {'type': 'loss', 'content': 0.10177282243967056, 'timestamp': '2025-10-01 04:24:14.395451', 'step': 6027, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:14.449333', 'step': 6027, 'epoch': 1} {'type': 'loss', 'content': 0.159255251288414, 'timestamp': '2025-10-01 04:24:14.455170', 'step': 6028, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:14.507959', 'step': 6028, 'epoch': 1} {'type': 'loss', 'content': 0.1664862185716629, 'timestamp': '2025-10-01 04:24:14.510231', 'step': 6029, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:14.564554', 'step': 6029, 'epoch': 1} {'type': 'loss', 'content': 0.1197880282998085, 'timestamp': '2025-10-01 04:24:14.567679', 'step': 6030, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:14.620986', 'step': 6030, 'epoch': 1} {'type': 'loss', 'content': 0.08068329095840454, 'timestamp': '2025-10-01 04:24:14.623071', 'step': 6031, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:14.675821', 'step': 6031, 'epoch': 1} {'type': 'loss', 'content': 0.15937566757202148, 'timestamp': '2025-10-01 04:24:14.681426', 'step': 6032, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:14.734345', 'step': 6032, 'epoch': 1} {'type': 'loss', 'content': 0.17906570434570312, 'timestamp': '2025-10-01 04:24:14.736304', 'step': 6033, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:14.789635', 'step': 6033, 'epoch': 1} {'type': 'loss', 'content': 0.1731926053762436, 'timestamp': '2025-10-01 04:24:14.791916', 'step': 6034, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:24:14.846410', 'step': 6034, 'epoch': 1} {'type': 'loss', 'content': 0.22024713456630707, 'timestamp': '2025-10-01 04:24:14.849776', 'step': 6035, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:14.915085', 'step': 6035, 'epoch': 1} {'type': 'loss', 'content': 0.2429150640964508, 'timestamp': '2025-10-01 04:24:14.920908', 'step': 6036, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:14.973378', 'step': 6036, 'epoch': 1} {'type': 'loss', 'content': 0.20094670355319977, 'timestamp': '2025-10-01 04:24:14.980144', 'step': 6037, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:15.032917', 'step': 6037, 'epoch': 1} {'type': 'loss', 'content': 0.13666047155857086, 'timestamp': '2025-10-01 04:24:15.036585', 'step': 6038, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:15.090216', 'step': 6038, 'epoch': 1} {'type': 'loss', 'content': 0.18394796550273895, 'timestamp': '2025-10-01 04:24:15.092535', 'step': 6039, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:15.155980', 'step': 6039, 'epoch': 1} {'type': 'loss', 'content': 0.10859556496143341, 'timestamp': '2025-10-01 04:24:15.175004', 'step': 6040, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:15.229869', 'step': 6040, 'epoch': 1} {'type': 'loss', 'content': 0.18390047550201416, 'timestamp': '2025-10-01 04:24:15.231881', 'step': 6041, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:15.286867', 'step': 6041, 'epoch': 1} {'type': 'loss', 'content': 0.16029316186904907, 'timestamp': '2025-10-01 04:24:15.288778', 'step': 6042, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:15.342705', 'step': 6042, 'epoch': 1} {'type': 'loss', 'content': 0.2144325226545334, 'timestamp': '2025-10-01 04:24:15.344988', 'step': 6043, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:15.397721', 'step': 6043, 'epoch': 1} {'type': 'loss', 'content': 0.12365258485078812, 'timestamp': '2025-10-01 04:24:15.403569', 'step': 6044, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:15.462030', 'step': 6044, 'epoch': 1} {'type': 'loss', 'content': 0.1298970729112625, 'timestamp': '2025-10-01 04:24:15.464317', 'step': 6045, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:15.517410', 'step': 6045, 'epoch': 1} {'type': 'loss', 'content': 0.15613241493701935, 'timestamp': '2025-10-01 04:24:15.520093', 'step': 6046, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:15.574566', 'step': 6046, 'epoch': 1} {'type': 'loss', 'content': 0.13050433993339539, 'timestamp': '2025-10-01 04:24:15.576988', 'step': 6047, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:15.632778', 'step': 6047, 'epoch': 1} {'type': 'loss', 'content': 0.17315146327018738, 'timestamp': '2025-10-01 04:24:15.638456', 'step': 6048, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:15.691611', 'step': 6048, 'epoch': 1} {'type': 'loss', 'content': 0.10888755321502686, 'timestamp': '2025-10-01 04:24:15.693832', 'step': 6049, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:15.746871', 'step': 6049, 'epoch': 1} {'type': 'loss', 'content': 0.1627204716205597, 'timestamp': '2025-10-01 04:24:15.749248', 'step': 6050, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:24:15.802840', 'step': 6050, 'epoch': 1} {'type': 'loss', 'content': 0.2702409327030182, 'timestamp': '2025-10-01 04:24:15.805401', 'step': 6051, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:15.860360', 'step': 6051, 'epoch': 1} {'type': 'loss', 'content': 0.13676215708255768, 'timestamp': '2025-10-01 04:24:15.866326', 'step': 6052, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:24:15.921258', 'step': 6052, 'epoch': 1} {'type': 'loss', 'content': 0.08683105558156967, 'timestamp': '2025-10-01 04:24:15.923383', 'step': 6053, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:15.978310', 'step': 6053, 'epoch': 1} {'type': 'loss', 'content': 0.11023148894309998, 'timestamp': '2025-10-01 04:24:15.980968', 'step': 6054, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:16.033493', 'step': 6054, 'epoch': 1} {'type': 'loss', 'content': 0.14714448153972626, 'timestamp': '2025-10-01 04:24:16.036193', 'step': 6055, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:16.094838', 'step': 6055, 'epoch': 1} {'type': 'loss', 'content': 0.1805717796087265, 'timestamp': '2025-10-01 04:24:16.110371', 'step': 6056, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:16.171817', 'step': 6056, 'epoch': 1} {'type': 'loss', 'content': 0.1121651828289032, 'timestamp': '2025-10-01 04:24:16.173839', 'step': 6057, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:16.237405', 'step': 6057, 'epoch': 1} {'type': 'loss', 'content': 0.16192692518234253, 'timestamp': '2025-10-01 04:24:16.239928', 'step': 6058, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-01 04:24:29.689102', 'step': 6058, 'epoch': 1} {'type': 'pplx', 'content': 11859.114208798574, 'timestamp': '2025-10-01 04:24:29.692837', 'step': 6058, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:29.759121', 'step': 6058, 'epoch': 1} {'type': 'loss', 'content': 0.3217789828777313, 'timestamp': '2025-10-01 04:24:29.761899', 'step': 6059, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:29.817687', 'step': 6059, 'epoch': 1} {'type': 'loss', 'content': 0.1938820630311966, 'timestamp': '2025-10-01 04:24:29.823983', 'step': 6060, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:29.877174', 'step': 6060, 'epoch': 1} {'type': 'loss', 'content': 0.18021303415298462, 'timestamp': '2025-10-01 04:24:29.879334', 'step': 6061, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:24:29.932738', 'step': 6061, 'epoch': 1} {'type': 'loss', 'content': 0.1848822832107544, 'timestamp': '2025-10-01 04:24:29.943312', 'step': 6062, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:29.997434', 'step': 6062, 'epoch': 1} {'type': 'loss', 'content': 0.07090411335229874, 'timestamp': '2025-10-01 04:24:30.009515', 'step': 6063, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:30.064189', 'step': 6063, 'epoch': 1} {'type': 'loss', 'content': 0.08711408823728561, 'timestamp': '2025-10-01 04:24:30.070066', 'step': 6064, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:30.124267', 'step': 6064, 'epoch': 1} {'type': 'loss', 'content': 0.23843343555927277, 'timestamp': '2025-10-01 04:24:30.126528', 'step': 6065, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:30.186730', 'step': 6065, 'epoch': 1} {'type': 'loss', 'content': 0.09255173802375793, 'timestamp': '2025-10-01 04:24:30.194415', 'step': 6066, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:30.253700', 'step': 6066, 'epoch': 1} {'type': 'loss', 'content': 0.16966189444065094, 'timestamp': '2025-10-01 04:24:30.258492', 'step': 6067, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:30.324654', 'step': 6067, 'epoch': 1} {'type': 'loss', 'content': 0.11557023972272873, 'timestamp': '2025-10-01 04:24:30.330926', 'step': 6068, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:24:30.390571', 'step': 6068, 'epoch': 1} {'type': 'loss', 'content': 0.17748650908470154, 'timestamp': '2025-10-01 04:24:30.393042', 'step': 6069, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:30.452462', 'step': 6069, 'epoch': 1} {'type': 'loss', 'content': 0.12941133975982666, 'timestamp': '2025-10-01 04:24:30.455832', 'step': 6070, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:30.510550', 'step': 6070, 'epoch': 1} {'type': 'loss', 'content': 0.23540130257606506, 'timestamp': '2025-10-01 04:24:30.513230', 'step': 6071, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:30.568398', 'step': 6071, 'epoch': 1} {'type': 'loss', 'content': 0.13685326278209686, 'timestamp': '2025-10-01 04:24:30.574623', 'step': 6072, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:30.628332', 'step': 6072, 'epoch': 1} {'type': 'loss', 'content': 0.10598359256982803, 'timestamp': '2025-10-01 04:24:30.630467', 'step': 6073, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:30.684651', 'step': 6073, 'epoch': 1} {'type': 'loss', 'content': 0.1554841846227646, 'timestamp': '2025-10-01 04:24:30.687223', 'step': 6074, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:30.741269', 'step': 6074, 'epoch': 1} {'type': 'loss', 'content': 0.15612392127513885, 'timestamp': '2025-10-01 04:24:30.743911', 'step': 6075, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:24:30.799420', 'step': 6075, 'epoch': 1} {'type': 'loss', 'content': 0.19064967334270477, 'timestamp': '2025-10-01 04:24:30.805737', 'step': 6076, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:30.859130', 'step': 6076, 'epoch': 1} {'type': 'loss', 'content': 0.12668676674365997, 'timestamp': '2025-10-01 04:24:30.862014', 'step': 6077, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:30.916043', 'step': 6077, 'epoch': 1} {'type': 'loss', 'content': 0.09110735356807709, 'timestamp': '2025-10-01 04:24:30.919144', 'step': 6078, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:30.972035', 'step': 6078, 'epoch': 1} {'type': 'loss', 'content': 0.14703024923801422, 'timestamp': '2025-10-01 04:24:30.974613', 'step': 6079, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:31.028072', 'step': 6079, 'epoch': 1} {'type': 'loss', 'content': 0.19455422461032867, 'timestamp': '2025-10-01 04:24:31.034923', 'step': 6080, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:24:31.089063', 'step': 6080, 'epoch': 1} {'type': 'loss', 'content': 0.2149079144001007, 'timestamp': '2025-10-01 04:24:31.091682', 'step': 6081, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:31.144833', 'step': 6081, 'epoch': 1} {'type': 'loss', 'content': 0.0858314260840416, 'timestamp': '2025-10-01 04:24:31.147553', 'step': 6082, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:31.201241', 'step': 6082, 'epoch': 1} {'type': 'loss', 'content': 0.14057405292987823, 'timestamp': '2025-10-01 04:24:31.210163', 'step': 6083, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:31.263643', 'step': 6083, 'epoch': 1} {'type': 'loss', 'content': 0.11956685036420822, 'timestamp': '2025-10-01 04:24:31.269741', 'step': 6084, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:31.323568', 'step': 6084, 'epoch': 1} {'type': 'loss', 'content': 0.20331968367099762, 'timestamp': '2025-10-01 04:24:31.325829', 'step': 6085, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:31.379644', 'step': 6085, 'epoch': 1} {'type': 'loss', 'content': 0.1834106594324112, 'timestamp': '2025-10-01 04:24:31.381743', 'step': 6086, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:31.434660', 'step': 6086, 'epoch': 1} {'type': 'loss', 'content': 0.3260161280632019, 'timestamp': '2025-10-01 04:24:31.436793', 'step': 6087, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:31.503359', 'step': 6087, 'epoch': 1} {'type': 'loss', 'content': 0.20471793413162231, 'timestamp': '2025-10-01 04:24:31.509348', 'step': 6088, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:31.561898', 'step': 6088, 'epoch': 1} {'type': 'loss', 'content': 0.10136818885803223, 'timestamp': '2025-10-01 04:24:31.563908', 'step': 6089, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:31.616500', 'step': 6089, 'epoch': 1} {'type': 'loss', 'content': 0.20506684482097626, 'timestamp': '2025-10-01 04:24:31.618498', 'step': 6090, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:31.671746', 'step': 6090, 'epoch': 1} {'type': 'loss', 'content': 0.1410118192434311, 'timestamp': '2025-10-01 04:24:31.674123', 'step': 6091, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:31.727541', 'step': 6091, 'epoch': 1} {'type': 'loss', 'content': 0.22785358130931854, 'timestamp': '2025-10-01 04:24:31.733074', 'step': 6092, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:31.785259', 'step': 6092, 'epoch': 1} {'type': 'loss', 'content': 0.1238330602645874, 'timestamp': '2025-10-01 04:24:31.787456', 'step': 6093, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:31.840204', 'step': 6093, 'epoch': 1} {'type': 'loss', 'content': 0.10946711897850037, 'timestamp': '2025-10-01 04:24:31.842377', 'step': 6094, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:31.896162', 'step': 6094, 'epoch': 1} {'type': 'loss', 'content': 0.2519576847553253, 'timestamp': '2025-10-01 04:24:31.898279', 'step': 6095, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:31.961853', 'step': 6095, 'epoch': 1} {'type': 'loss', 'content': 0.2228369265794754, 'timestamp': '2025-10-01 04:24:31.968259', 'step': 6096, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:32.022314', 'step': 6096, 'epoch': 1} {'type': 'loss', 'content': 0.12502893805503845, 'timestamp': '2025-10-01 04:24:32.027268', 'step': 6097, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:32.089763', 'step': 6097, 'epoch': 1} {'type': 'loss', 'content': 0.11480318009853363, 'timestamp': '2025-10-01 04:24:32.093596', 'step': 6098, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:32.152415', 'step': 6098, 'epoch': 1} {'type': 'loss', 'content': 0.15463830530643463, 'timestamp': '2025-10-01 04:24:32.154916', 'step': 6099, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:24:32.215620', 'step': 6099, 'epoch': 1} {'type': 'loss', 'content': 0.11307504028081894, 'timestamp': '2025-10-01 04:24:32.221472', 'step': 6100, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:32.273896', 'step': 6100, 'epoch': 1} {'type': 'loss', 'content': 0.13110792636871338, 'timestamp': '2025-10-01 04:24:32.283273', 'step': 6101, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:32.339046', 'step': 6101, 'epoch': 1} {'type': 'loss', 'content': 0.13103000819683075, 'timestamp': '2025-10-01 04:24:32.343924', 'step': 6102, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:32.401852', 'step': 6102, 'epoch': 1} {'type': 'loss', 'content': 0.14878565073013306, 'timestamp': '2025-10-01 04:24:32.404073', 'step': 6103, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:24:32.456725', 'step': 6103, 'epoch': 1} {'type': 'loss', 'content': 0.17168234288692474, 'timestamp': '2025-10-01 04:24:32.462466', 'step': 6104, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:32.515827', 'step': 6104, 'epoch': 1} {'type': 'loss', 'content': 0.156676784157753, 'timestamp': '2025-10-01 04:24:32.518141', 'step': 6105, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:32.570654', 'step': 6105, 'epoch': 1} {'type': 'loss', 'content': 0.13502536714076996, 'timestamp': '2025-10-01 04:24:32.572791', 'step': 6106, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:32.625787', 'step': 6106, 'epoch': 1} {'type': 'loss', 'content': 0.2084154188632965, 'timestamp': '2025-10-01 04:24:32.627763', 'step': 6107, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:32.683184', 'step': 6107, 'epoch': 1} {'type': 'loss', 'content': 0.19232285022735596, 'timestamp': '2025-10-01 04:24:32.688842', 'step': 6108, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:32.741180', 'step': 6108, 'epoch': 1} {'type': 'loss', 'content': 0.17930111289024353, 'timestamp': '2025-10-01 04:24:32.744937', 'step': 6109, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:32.797820', 'step': 6109, 'epoch': 1} {'type': 'loss', 'content': 0.13074931502342224, 'timestamp': '2025-10-01 04:24:32.800358', 'step': 6110, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:32.854210', 'step': 6110, 'epoch': 1} {'type': 'loss', 'content': 0.16892468929290771, 'timestamp': '2025-10-01 04:24:32.858332', 'step': 6111, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:32.916099', 'step': 6111, 'epoch': 1} {'type': 'loss', 'content': 0.16951264441013336, 'timestamp': '2025-10-01 04:24:32.923651', 'step': 6112, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:32.985293', 'step': 6112, 'epoch': 1} {'type': 'loss', 'content': 0.139516681432724, 'timestamp': '2025-10-01 04:24:32.987537', 'step': 6113, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:33.047081', 'step': 6113, 'epoch': 1} {'type': 'loss', 'content': 0.1354895383119583, 'timestamp': '2025-10-01 04:24:33.049264', 'step': 6114, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:33.102554', 'step': 6114, 'epoch': 1} {'type': 'loss', 'content': 0.11476919054985046, 'timestamp': '2025-10-01 04:24:33.105268', 'step': 6115, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:33.163231', 'step': 6115, 'epoch': 1} {'type': 'loss', 'content': 0.1975734829902649, 'timestamp': '2025-10-01 04:24:33.168947', 'step': 6116, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:33.223646', 'step': 6116, 'epoch': 1} {'type': 'loss', 'content': 0.18317393958568573, 'timestamp': '2025-10-01 04:24:33.228896', 'step': 6117, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:24:33.287002', 'step': 6117, 'epoch': 1} {'type': 'loss', 'content': 0.12922634184360504, 'timestamp': '2025-10-01 04:24:33.289139', 'step': 6118, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:24:33.342316', 'step': 6118, 'epoch': 1} {'type': 'loss', 'content': 0.1410265415906906, 'timestamp': '2025-10-01 04:24:33.344770', 'step': 6119, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:33.403928', 'step': 6119, 'epoch': 1} {'type': 'loss', 'content': 0.21284933388233185, 'timestamp': '2025-10-01 04:24:33.417171', 'step': 6120, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:33.472168', 'step': 6120, 'epoch': 1} {'type': 'loss', 'content': 0.10564475506544113, 'timestamp': '2025-10-01 04:24:33.482953', 'step': 6121, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:33.552339', 'step': 6121, 'epoch': 1} {'type': 'loss', 'content': 0.20280516147613525, 'timestamp': '2025-10-01 04:24:33.555800', 'step': 6122, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:33.623952', 'step': 6122, 'epoch': 1} {'type': 'loss', 'content': 0.2058020979166031, 'timestamp': '2025-10-01 04:24:33.626266', 'step': 6123, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:33.679134', 'step': 6123, 'epoch': 1} {'type': 'loss', 'content': 0.10384665429592133, 'timestamp': '2025-10-01 04:24:33.685730', 'step': 6124, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:33.746592', 'step': 6124, 'epoch': 1} {'type': 'loss', 'content': 0.2916984558105469, 'timestamp': '2025-10-01 04:24:33.748780', 'step': 6125, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:33.805999', 'step': 6125, 'epoch': 1} {'type': 'loss', 'content': 0.1930924952030182, 'timestamp': '2025-10-01 04:24:33.808658', 'step': 6126, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:24:33.862619', 'step': 6126, 'epoch': 1} {'type': 'loss', 'content': 0.09565416723489761, 'timestamp': '2025-10-01 04:24:33.865038', 'step': 6127, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:24:33.920246', 'step': 6127, 'epoch': 1} {'type': 'loss', 'content': 0.18380574882030487, 'timestamp': '2025-10-01 04:24:33.925983', 'step': 6128, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:33.978548', 'step': 6128, 'epoch': 1} {'type': 'loss', 'content': 0.12692773342132568, 'timestamp': '2025-10-01 04:24:33.980622', 'step': 6129, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:34.033691', 'step': 6129, 'epoch': 1} {'type': 'loss', 'content': 0.1595340371131897, 'timestamp': '2025-10-01 04:24:34.036236', 'step': 6130, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:34.089342', 'step': 6130, 'epoch': 1} {'type': 'loss', 'content': 0.10172762721776962, 'timestamp': '2025-10-01 04:24:34.093629', 'step': 6131, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:34.147055', 'step': 6131, 'epoch': 1} {'type': 'loss', 'content': 0.3116708993911743, 'timestamp': '2025-10-01 04:24:34.152611', 'step': 6132, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:34.204738', 'step': 6132, 'epoch': 1} {'type': 'loss', 'content': 0.14964717626571655, 'timestamp': '2025-10-01 04:24:34.207140', 'step': 6133, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:24:34.260141', 'step': 6133, 'epoch': 1} {'type': 'loss', 'content': 0.11718699336051941, 'timestamp': '2025-10-01 04:24:34.262268', 'step': 6134, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:34.316149', 'step': 6134, 'epoch': 1} {'type': 'loss', 'content': 0.2024679034948349, 'timestamp': '2025-10-01 04:24:34.318223', 'step': 6135, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:34.375960', 'step': 6135, 'epoch': 1} {'type': 'loss', 'content': 0.11861221492290497, 'timestamp': '2025-10-01 04:24:34.381506', 'step': 6136, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:34.434947', 'step': 6136, 'epoch': 1} {'type': 'loss', 'content': 0.15773949027061462, 'timestamp': '2025-10-01 04:24:34.436923', 'step': 6137, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:34.491658', 'step': 6137, 'epoch': 1} {'type': 'loss', 'content': 0.24079088866710663, 'timestamp': '2025-10-01 04:24:34.494211', 'step': 6138, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:34.546988', 'step': 6138, 'epoch': 1} {'type': 'loss', 'content': 0.24138422310352325, 'timestamp': '2025-10-01 04:24:34.549729', 'step': 6139, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:34.604180', 'step': 6139, 'epoch': 1} {'type': 'loss', 'content': 0.13558940589427948, 'timestamp': '2025-10-01 04:24:34.609842', 'step': 6140, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:34.663647', 'step': 6140, 'epoch': 1} {'type': 'loss', 'content': 0.14657744765281677, 'timestamp': '2025-10-01 04:24:34.666942', 'step': 6141, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:34.720541', 'step': 6141, 'epoch': 1} {'type': 'loss', 'content': 0.27482038736343384, 'timestamp': '2025-10-01 04:24:34.722668', 'step': 6142, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:34.775660', 'step': 6142, 'epoch': 1} {'type': 'loss', 'content': 0.20249374210834503, 'timestamp': '2025-10-01 04:24:34.778110', 'step': 6143, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:34.831024', 'step': 6143, 'epoch': 1} {'type': 'loss', 'content': 0.11314132809638977, 'timestamp': '2025-10-01 04:24:34.836896', 'step': 6144, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:34.892378', 'step': 6144, 'epoch': 1} {'type': 'loss', 'content': 0.21441394090652466, 'timestamp': '2025-10-01 04:24:34.894847', 'step': 6145, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:34.952324', 'step': 6145, 'epoch': 1} {'type': 'loss', 'content': 0.15474171936511993, 'timestamp': '2025-10-01 04:24:34.959658', 'step': 6146, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:35.016605', 'step': 6146, 'epoch': 1} {'type': 'loss', 'content': 0.18449629843235016, 'timestamp': '2025-10-01 04:24:35.019216', 'step': 6147, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:35.073820', 'step': 6147, 'epoch': 1} {'type': 'loss', 'content': 0.15424861013889313, 'timestamp': '2025-10-01 04:24:35.079919', 'step': 6148, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:35.133672', 'step': 6148, 'epoch': 1} {'type': 'loss', 'content': 0.15260308980941772, 'timestamp': '2025-10-01 04:24:35.136696', 'step': 6149, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:35.189134', 'step': 6149, 'epoch': 1} {'type': 'loss', 'content': 0.17405138909816742, 'timestamp': '2025-10-01 04:24:35.191090', 'step': 6150, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:35.243924', 'step': 6150, 'epoch': 1} {'type': 'loss', 'content': 0.20471030473709106, 'timestamp': '2025-10-01 04:24:35.246215', 'step': 6151, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:35.298784', 'step': 6151, 'epoch': 1} {'type': 'loss', 'content': 0.1793462336063385, 'timestamp': '2025-10-01 04:24:35.304514', 'step': 6152, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:35.356877', 'step': 6152, 'epoch': 1} {'type': 'loss', 'content': 0.2592228949069977, 'timestamp': '2025-10-01 04:24:35.361307', 'step': 6153, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:35.414225', 'step': 6153, 'epoch': 1} {'type': 'loss', 'content': 0.12197846174240112, 'timestamp': '2025-10-01 04:24:35.420142', 'step': 6154, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:35.477398', 'step': 6154, 'epoch': 1} {'type': 'loss', 'content': 0.1928579956293106, 'timestamp': '2025-10-01 04:24:35.479633', 'step': 6155, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:35.533543', 'step': 6155, 'epoch': 1} {'type': 'loss', 'content': 0.1999705731868744, 'timestamp': '2025-10-01 04:24:35.539243', 'step': 6156, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:24:35.591655', 'step': 6156, 'epoch': 1} {'type': 'loss', 'content': 0.1570005714893341, 'timestamp': '2025-10-01 04:24:35.593593', 'step': 6157, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:35.646410', 'step': 6157, 'epoch': 1} {'type': 'loss', 'content': 0.3115723729133606, 'timestamp': '2025-10-01 04:24:35.648398', 'step': 6158, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:35.704966', 'step': 6158, 'epoch': 1} {'type': 'loss', 'content': 0.1929384469985962, 'timestamp': '2025-10-01 04:24:35.707122', 'step': 6159, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:35.771845', 'step': 6159, 'epoch': 1} {'type': 'loss', 'content': 0.12286382913589478, 'timestamp': '2025-10-01 04:24:35.777656', 'step': 6160, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:35.837058', 'step': 6160, 'epoch': 1} {'type': 'loss', 'content': 0.11530062556266785, 'timestamp': '2025-10-01 04:24:35.838987', 'step': 6161, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:35.902326', 'step': 6161, 'epoch': 1} {'type': 'loss', 'content': 0.15273135900497437, 'timestamp': '2025-10-01 04:24:35.904422', 'step': 6162, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:35.957452', 'step': 6162, 'epoch': 1} {'type': 'loss', 'content': 0.1123906746506691, 'timestamp': '2025-10-01 04:24:35.959577', 'step': 6163, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:36.013033', 'step': 6163, 'epoch': 1} {'type': 'loss', 'content': 0.15813881158828735, 'timestamp': '2025-10-01 04:24:36.018485', 'step': 6164, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:36.070954', 'step': 6164, 'epoch': 1} {'type': 'loss', 'content': 0.17159855365753174, 'timestamp': '2025-10-01 04:24:36.072992', 'step': 6165, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:36.125967', 'step': 6165, 'epoch': 1} {'type': 'loss', 'content': 0.09841900318861008, 'timestamp': '2025-10-01 04:24:36.128192', 'step': 6166, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:36.183111', 'step': 6166, 'epoch': 1} {'type': 'loss', 'content': 0.17284922301769257, 'timestamp': '2025-10-01 04:24:36.185597', 'step': 6167, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:36.238898', 'step': 6167, 'epoch': 1} {'type': 'loss', 'content': 0.1391548067331314, 'timestamp': '2025-10-01 04:24:36.244664', 'step': 6168, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:36.298066', 'step': 6168, 'epoch': 1} {'type': 'loss', 'content': 0.19669103622436523, 'timestamp': '2025-10-01 04:24:36.300221', 'step': 6169, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:36.355291', 'step': 6169, 'epoch': 1} {'type': 'loss', 'content': 0.12924312055110931, 'timestamp': '2025-10-01 04:24:36.357462', 'step': 6170, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:36.418531', 'step': 6170, 'epoch': 1} {'type': 'loss', 'content': 0.16496926546096802, 'timestamp': '2025-10-01 04:24:36.426148', 'step': 6171, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:36.479674', 'step': 6171, 'epoch': 1} {'type': 'loss', 'content': 0.1443103700876236, 'timestamp': '2025-10-01 04:24:36.485091', 'step': 6172, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:36.537382', 'step': 6172, 'epoch': 1} {'type': 'loss', 'content': 0.11903063207864761, 'timestamp': '2025-10-01 04:24:36.540689', 'step': 6173, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:36.594248', 'step': 6173, 'epoch': 1} {'type': 'loss', 'content': 0.22132785618305206, 'timestamp': '2025-10-01 04:24:36.596403', 'step': 6174, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:36.652290', 'step': 6174, 'epoch': 1} {'type': 'loss', 'content': 0.1566888689994812, 'timestamp': '2025-10-01 04:24:36.659123', 'step': 6175, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:36.712381', 'step': 6175, 'epoch': 1} {'type': 'loss', 'content': 0.16252079606056213, 'timestamp': '2025-10-01 04:24:36.717943', 'step': 6176, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:36.783572', 'step': 6176, 'epoch': 1} {'type': 'loss', 'content': 0.1611083298921585, 'timestamp': '2025-10-01 04:24:36.785620', 'step': 6177, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:36.839411', 'step': 6177, 'epoch': 1} {'type': 'loss', 'content': 0.2060515433549881, 'timestamp': '2025-10-01 04:24:36.840959', 'step': 6178, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:36.893833', 'step': 6178, 'epoch': 1} {'type': 'loss', 'content': 0.13520438969135284, 'timestamp': '2025-10-01 04:24:36.899201', 'step': 6179, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:36.952423', 'step': 6179, 'epoch': 1} {'type': 'loss', 'content': 0.19938261806964874, 'timestamp': '2025-10-01 04:24:36.961360', 'step': 6180, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:37.013373', 'step': 6180, 'epoch': 1} {'type': 'loss', 'content': 0.13103614747524261, 'timestamp': '2025-10-01 04:24:37.015383', 'step': 6181, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:37.068183', 'step': 6181, 'epoch': 1} {'type': 'loss', 'content': 0.1368364840745926, 'timestamp': '2025-10-01 04:24:37.070810', 'step': 6182, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:37.126988', 'step': 6182, 'epoch': 1} {'type': 'loss', 'content': 0.1665605753660202, 'timestamp': '2025-10-01 04:24:37.129947', 'step': 6183, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:37.185550', 'step': 6183, 'epoch': 1} {'type': 'loss', 'content': 0.20368900895118713, 'timestamp': '2025-10-01 04:24:37.191240', 'step': 6184, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:37.251629', 'step': 6184, 'epoch': 1} {'type': 'loss', 'content': 0.16774460673332214, 'timestamp': '2025-10-01 04:24:37.254320', 'step': 6185, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:24:37.311361', 'step': 6185, 'epoch': 1} {'type': 'loss', 'content': 0.11753227561712265, 'timestamp': '2025-10-01 04:24:37.313534', 'step': 6186, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:37.367047', 'step': 6186, 'epoch': 1} {'type': 'loss', 'content': 0.24748222529888153, 'timestamp': '2025-10-01 04:24:37.369122', 'step': 6187, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:37.422369', 'step': 6187, 'epoch': 1} {'type': 'loss', 'content': 0.17162619531154633, 'timestamp': '2025-10-01 04:24:37.428310', 'step': 6188, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:37.484239', 'step': 6188, 'epoch': 1} {'type': 'loss', 'content': 0.08652772754430771, 'timestamp': '2025-10-01 04:24:37.486859', 'step': 6189, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:37.540550', 'step': 6189, 'epoch': 1} {'type': 'loss', 'content': 0.19490227103233337, 'timestamp': '2025-10-01 04:24:37.542779', 'step': 6190, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:37.596961', 'step': 6190, 'epoch': 1} {'type': 'loss', 'content': 0.09999915212392807, 'timestamp': '2025-10-01 04:24:37.599698', 'step': 6191, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:24:37.653132', 'step': 6191, 'epoch': 1} {'type': 'loss', 'content': 0.2454204559326172, 'timestamp': '2025-10-01 04:24:37.660181', 'step': 6192, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:24:37.712694', 'step': 6192, 'epoch': 1} {'type': 'loss', 'content': 0.1998487263917923, 'timestamp': '2025-10-01 04:24:37.714778', 'step': 6193, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:37.768585', 'step': 6193, 'epoch': 1} {'type': 'loss', 'content': 0.20587500929832458, 'timestamp': '2025-10-01 04:24:37.771296', 'step': 6194, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:24:37.824584', 'step': 6194, 'epoch': 1} {'type': 'loss', 'content': 0.1799233853816986, 'timestamp': '2025-10-01 04:24:37.826891', 'step': 6195, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:37.880338', 'step': 6195, 'epoch': 1} {'type': 'loss', 'content': 0.09481683373451233, 'timestamp': '2025-10-01 04:24:37.886547', 'step': 6196, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:37.940791', 'step': 6196, 'epoch': 1} {'type': 'loss', 'content': 0.17495325207710266, 'timestamp': '2025-10-01 04:24:37.943319', 'step': 6197, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:24:37.995737', 'step': 6197, 'epoch': 1} {'type': 'loss', 'content': 0.08361197263002396, 'timestamp': '2025-10-01 04:24:38.004911', 'step': 6198, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:38.058396', 'step': 6198, 'epoch': 1} {'type': 'loss', 'content': 0.124324731528759, 'timestamp': '2025-10-01 04:24:38.060837', 'step': 6199, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:38.113355', 'step': 6199, 'epoch': 1} {'type': 'loss', 'content': 0.12923410534858704, 'timestamp': '2025-10-01 04:24:38.123430', 'step': 6200, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:38.186669', 'step': 6200, 'epoch': 1} {'type': 'loss', 'content': 0.23133783042430878, 'timestamp': '2025-10-01 04:24:38.188782', 'step': 6201, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:38.241427', 'step': 6201, 'epoch': 1} {'type': 'loss', 'content': 0.1346849799156189, 'timestamp': '2025-10-01 04:24:38.243427', 'step': 6202, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:38.296938', 'step': 6202, 'epoch': 1} {'type': 'loss', 'content': 0.16047899425029755, 'timestamp': '2025-10-01 04:24:38.299774', 'step': 6203, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:38.352444', 'step': 6203, 'epoch': 1} {'type': 'loss', 'content': 0.1633942574262619, 'timestamp': '2025-10-01 04:24:38.358076', 'step': 6204, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:38.411147', 'step': 6204, 'epoch': 1} {'type': 'loss', 'content': 0.13687282800674438, 'timestamp': '2025-10-01 04:24:38.413082', 'step': 6205, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:38.466111', 'step': 6205, 'epoch': 1} {'type': 'loss', 'content': 0.26327961683273315, 'timestamp': '2025-10-01 04:24:38.468303', 'step': 6206, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:38.522407', 'step': 6206, 'epoch': 1} {'type': 'loss', 'content': 0.12016526609659195, 'timestamp': '2025-10-01 04:24:38.524377', 'step': 6207, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:24:38.580957', 'step': 6207, 'epoch': 1} {'type': 'loss', 'content': 0.15800876915454865, 'timestamp': '2025-10-01 04:24:38.586840', 'step': 6208, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:38.639126', 'step': 6208, 'epoch': 1} {'type': 'loss', 'content': 0.20379316806793213, 'timestamp': '2025-10-01 04:24:38.647387', 'step': 6209, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:24:38.702349', 'step': 6209, 'epoch': 1} {'type': 'loss', 'content': 0.19038626551628113, 'timestamp': '2025-10-01 04:24:38.704480', 'step': 6210, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:38.757568', 'step': 6210, 'epoch': 1} {'type': 'loss', 'content': 0.1899508833885193, 'timestamp': '2025-10-01 04:24:38.759567', 'step': 6211, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:38.812490', 'step': 6211, 'epoch': 1} {'type': 'loss', 'content': 0.17394109070301056, 'timestamp': '2025-10-01 04:24:38.818169', 'step': 6212, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:38.871036', 'step': 6212, 'epoch': 1} {'type': 'loss', 'content': 0.17011719942092896, 'timestamp': '2025-10-01 04:24:38.873135', 'step': 6213, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:24:38.925901', 'step': 6213, 'epoch': 1} {'type': 'loss', 'content': 0.12710171937942505, 'timestamp': '2025-10-01 04:24:38.928234', 'step': 6214, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:38.988347', 'step': 6214, 'epoch': 1} {'type': 'loss', 'content': 0.08806294202804565, 'timestamp': '2025-10-01 04:24:38.990356', 'step': 6215, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:39.046685', 'step': 6215, 'epoch': 1} {'type': 'loss', 'content': 0.14999310672283173, 'timestamp': '2025-10-01 04:24:39.052795', 'step': 6216, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:39.107222', 'step': 6216, 'epoch': 1} {'type': 'loss', 'content': 0.1818792223930359, 'timestamp': '2025-10-01 04:24:39.109884', 'step': 6217, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:39.164171', 'step': 6217, 'epoch': 1} {'type': 'loss', 'content': 0.25683924555778503, 'timestamp': '2025-10-01 04:24:39.168930', 'step': 6218, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:24:39.223048', 'step': 6218, 'epoch': 1} {'type': 'loss', 'content': 0.3439522385597229, 'timestamp': '2025-10-01 04:24:39.225324', 'step': 6219, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:24:39.283518', 'step': 6219, 'epoch': 1} {'type': 'loss', 'content': 0.120976522564888, 'timestamp': '2025-10-01 04:24:39.289913', 'step': 6220, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:39.350690', 'step': 6220, 'epoch': 1} {'type': 'loss', 'content': 0.12694872915744781, 'timestamp': '2025-10-01 04:24:39.353186', 'step': 6221, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:24:39.407456', 'step': 6221, 'epoch': 1} {'type': 'loss', 'content': 0.15180765092372894, 'timestamp': '2025-10-01 04:24:39.409522', 'step': 6222, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:39.467233', 'step': 6222, 'epoch': 1} {'type': 'loss', 'content': 0.09101161360740662, 'timestamp': '2025-10-01 04:24:39.469352', 'step': 6223, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:39.525165', 'step': 6223, 'epoch': 1} {'type': 'loss', 'content': 0.1451593041419983, 'timestamp': '2025-10-01 04:24:39.531205', 'step': 6224, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:39.584779', 'step': 6224, 'epoch': 1} {'type': 'loss', 'content': 0.1558726727962494, 'timestamp': '2025-10-01 04:24:39.587786', 'step': 6225, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:39.647092', 'step': 6225, 'epoch': 1} {'type': 'loss', 'content': 0.1507461667060852, 'timestamp': '2025-10-01 04:24:39.649634', 'step': 6226, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:39.704419', 'step': 6226, 'epoch': 1} {'type': 'loss', 'content': 0.21064092218875885, 'timestamp': '2025-10-01 04:24:39.707046', 'step': 6227, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:39.760994', 'step': 6227, 'epoch': 1} {'type': 'loss', 'content': 0.2837997078895569, 'timestamp': '2025-10-01 04:24:39.767258', 'step': 6228, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:39.820375', 'step': 6228, 'epoch': 1} {'type': 'loss', 'content': 0.12123414129018784, 'timestamp': '2025-10-01 04:24:39.822630', 'step': 6229, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:39.876151', 'step': 6229, 'epoch': 1} {'type': 'loss', 'content': 0.13800284266471863, 'timestamp': '2025-10-01 04:24:39.878598', 'step': 6230, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:39.932351', 'step': 6230, 'epoch': 1} {'type': 'loss', 'content': 0.12420696765184402, 'timestamp': '2025-10-01 04:24:39.934780', 'step': 6231, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:39.993474', 'step': 6231, 'epoch': 1} {'type': 'loss', 'content': 0.2501113712787628, 'timestamp': '2025-10-01 04:24:39.999519', 'step': 6232, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:40.052868', 'step': 6232, 'epoch': 1} {'type': 'loss', 'content': 0.11675356328487396, 'timestamp': '2025-10-01 04:24:40.055286', 'step': 6233, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:40.108978', 'step': 6233, 'epoch': 1} {'type': 'loss', 'content': 0.14690788090229034, 'timestamp': '2025-10-01 04:24:40.111220', 'step': 6234, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:24:40.165863', 'step': 6234, 'epoch': 1} {'type': 'loss', 'content': 0.17801471054553986, 'timestamp': '2025-10-01 04:24:40.167986', 'step': 6235, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:40.221913', 'step': 6235, 'epoch': 1} {'type': 'loss', 'content': 0.1359177678823471, 'timestamp': '2025-10-01 04:24:40.228382', 'step': 6236, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:40.282257', 'step': 6236, 'epoch': 1} {'type': 'loss', 'content': 0.13570734858512878, 'timestamp': '2025-10-01 04:24:40.284504', 'step': 6237, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:24:40.339398', 'step': 6237, 'epoch': 1} {'type': 'loss', 'content': 0.09045889973640442, 'timestamp': '2025-10-01 04:24:40.347811', 'step': 6238, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:40.401740', 'step': 6238, 'epoch': 1} {'type': 'loss', 'content': 0.2520172595977783, 'timestamp': '2025-10-01 04:24:40.404211', 'step': 6239, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:40.458292', 'step': 6239, 'epoch': 1} {'type': 'loss', 'content': 0.18660585582256317, 'timestamp': '2025-10-01 04:24:40.464832', 'step': 6240, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:24:40.519616', 'step': 6240, 'epoch': 1} {'type': 'loss', 'content': 0.15652622282505035, 'timestamp': '2025-10-01 04:24:40.522939', 'step': 6241, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:40.582487', 'step': 6241, 'epoch': 1} {'type': 'loss', 'content': 0.23331497609615326, 'timestamp': '2025-10-01 04:24:40.585395', 'step': 6242, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:40.644335', 'step': 6242, 'epoch': 1} {'type': 'loss', 'content': 0.15325544774532318, 'timestamp': '2025-10-01 04:24:40.646795', 'step': 6243, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:24:40.703982', 'step': 6243, 'epoch': 1} {'type': 'loss', 'content': 0.17924590408802032, 'timestamp': '2025-10-01 04:24:40.711120', 'step': 6244, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:40.769276', 'step': 6244, 'epoch': 1} {'type': 'loss', 'content': 0.081428162753582, 'timestamp': '2025-10-01 04:24:40.771875', 'step': 6245, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:24:40.831461', 'step': 6245, 'epoch': 1} {'type': 'loss', 'content': 0.12233071029186249, 'timestamp': '2025-10-01 04:24:40.833795', 'step': 6246, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:24:40.892846', 'step': 6246, 'epoch': 1} {'type': 'loss', 'content': 0.23518888652324677, 'timestamp': '2025-10-01 04:24:40.895788', 'step': 6247, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:40.954616', 'step': 6247, 'epoch': 1} {'type': 'loss', 'content': 0.13240405917167664, 'timestamp': '2025-10-01 04:24:40.961276', 'step': 6248, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:41.020647', 'step': 6248, 'epoch': 1} {'type': 'loss', 'content': 0.14045897126197815, 'timestamp': '2025-10-01 04:24:41.022983', 'step': 6249, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:41.081519', 'step': 6249, 'epoch': 1} {'type': 'loss', 'content': 0.18547625839710236, 'timestamp': '2025-10-01 04:24:41.083805', 'step': 6250, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:41.143493', 'step': 6250, 'epoch': 1} {'type': 'loss', 'content': 0.13196076452732086, 'timestamp': '2025-10-01 04:24:41.145837', 'step': 6251, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:41.204288', 'step': 6251, 'epoch': 1} {'type': 'loss', 'content': 0.11596719175577164, 'timestamp': '2025-10-01 04:24:41.211534', 'step': 6252, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:41.269596', 'step': 6252, 'epoch': 1} {'type': 'loss', 'content': 0.09602746367454529, 'timestamp': '2025-10-01 04:24:41.272040', 'step': 6253, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:41.336624', 'step': 6253, 'epoch': 1} {'type': 'loss', 'content': 0.23008504509925842, 'timestamp': '2025-10-01 04:24:41.339438', 'step': 6254, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:41.402659', 'step': 6254, 'epoch': 1} {'type': 'loss', 'content': 0.2911309599876404, 'timestamp': '2025-10-01 04:24:41.405056', 'step': 6255, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:24:41.464312', 'step': 6255, 'epoch': 1} {'type': 'loss', 'content': 0.12766893208026886, 'timestamp': '2025-10-01 04:24:41.471206', 'step': 6256, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:41.533527', 'step': 6256, 'epoch': 1} {'type': 'loss', 'content': 0.12618358433246613, 'timestamp': '2025-10-01 04:24:41.535817', 'step': 6257, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:41.593787', 'step': 6257, 'epoch': 1} {'type': 'loss', 'content': 0.15131919085979462, 'timestamp': '2025-10-01 04:24:41.595980', 'step': 6258, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:24:41.653137', 'step': 6258, 'epoch': 1} {'type': 'loss', 'content': 0.21632103621959686, 'timestamp': '2025-10-01 04:24:41.655513', 'step': 6259, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:41.714762', 'step': 6259, 'epoch': 1} {'type': 'loss', 'content': 0.18274261057376862, 'timestamp': '2025-10-01 04:24:41.721297', 'step': 6260, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:41.777890', 'step': 6260, 'epoch': 1} {'type': 'loss', 'content': 0.08289428055286407, 'timestamp': '2025-10-01 04:24:41.781533', 'step': 6261, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:41.845831', 'step': 6261, 'epoch': 1} {'type': 'loss', 'content': 0.08587256819009781, 'timestamp': '2025-10-01 04:24:41.848132', 'step': 6262, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:41.914744', 'step': 6262, 'epoch': 1} {'type': 'loss', 'content': 0.1856643408536911, 'timestamp': '2025-10-01 04:24:41.917018', 'step': 6263, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:41.983858', 'step': 6263, 'epoch': 1} {'type': 'loss', 'content': 0.1592150181531906, 'timestamp': '2025-10-01 04:24:41.990619', 'step': 6264, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:42.048885', 'step': 6264, 'epoch': 1} {'type': 'loss', 'content': 0.13788913190364838, 'timestamp': '2025-10-01 04:24:42.051193', 'step': 6265, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:42.108334', 'step': 6265, 'epoch': 1} {'type': 'loss', 'content': 0.1933397650718689, 'timestamp': '2025-10-01 04:24:42.110403', 'step': 6266, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:42.164835', 'step': 6266, 'epoch': 1} {'type': 'loss', 'content': 0.13316147029399872, 'timestamp': '2025-10-01 04:24:42.167091', 'step': 6267, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:42.220566', 'step': 6267, 'epoch': 1} {'type': 'loss', 'content': 0.15078386664390564, 'timestamp': '2025-10-01 04:24:42.227207', 'step': 6268, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:42.280123', 'step': 6268, 'epoch': 1} {'type': 'loss', 'content': 0.18207892775535583, 'timestamp': '2025-10-01 04:24:42.289070', 'step': 6269, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:24:42.347843', 'step': 6269, 'epoch': 1} {'type': 'loss', 'content': 0.19420386850833893, 'timestamp': '2025-10-01 04:24:42.350219', 'step': 6270, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:24:42.403925', 'step': 6270, 'epoch': 1} {'type': 'loss', 'content': 0.20768989622592926, 'timestamp': '2025-10-01 04:24:42.406073', 'step': 6271, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:42.459196', 'step': 6271, 'epoch': 1} {'type': 'loss', 'content': 0.14322267472743988, 'timestamp': '2025-10-01 04:24:42.465045', 'step': 6272, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:42.517531', 'step': 6272, 'epoch': 1} {'type': 'loss', 'content': 0.12766875326633453, 'timestamp': '2025-10-01 04:24:42.519522', 'step': 6273, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:24:42.573012', 'step': 6273, 'epoch': 1} {'type': 'loss', 'content': 0.1661895513534546, 'timestamp': '2025-10-01 04:24:42.575129', 'step': 6274, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:42.627737', 'step': 6274, 'epoch': 1} {'type': 'loss', 'content': 0.1518363058567047, 'timestamp': '2025-10-01 04:24:42.629820', 'step': 6275, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:42.683124', 'step': 6275, 'epoch': 1} {'type': 'loss', 'content': 0.2351778745651245, 'timestamp': '2025-10-01 04:24:42.688972', 'step': 6276, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:42.741979', 'step': 6276, 'epoch': 1} {'type': 'loss', 'content': 0.1170576736330986, 'timestamp': '2025-10-01 04:24:42.743965', 'step': 6277, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:42.796599', 'step': 6277, 'epoch': 1} {'type': 'loss', 'content': 0.1955411732196808, 'timestamp': '2025-10-01 04:24:42.798819', 'step': 6278, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:42.851536', 'step': 6278, 'epoch': 1} {'type': 'loss', 'content': 0.20460158586502075, 'timestamp': '2025-10-01 04:24:42.853668', 'step': 6279, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:24:42.907908', 'step': 6279, 'epoch': 1} {'type': 'loss', 'content': 0.12895916402339935, 'timestamp': '2025-10-01 04:24:42.913780', 'step': 6280, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:42.966867', 'step': 6280, 'epoch': 1} {'type': 'loss', 'content': 0.18188275396823883, 'timestamp': '2025-10-01 04:24:42.969329', 'step': 6281, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:43.023069', 'step': 6281, 'epoch': 1} {'type': 'loss', 'content': 0.18597187101840973, 'timestamp': '2025-10-01 04:24:43.025145', 'step': 6282, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:43.078293', 'step': 6282, 'epoch': 1} {'type': 'loss', 'content': 0.2564936578273773, 'timestamp': '2025-10-01 04:24:43.080644', 'step': 6283, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:43.133200', 'step': 6283, 'epoch': 1} {'type': 'loss', 'content': 0.14584147930145264, 'timestamp': '2025-10-01 04:24:43.139125', 'step': 6284, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:43.191929', 'step': 6284, 'epoch': 1} {'type': 'loss', 'content': 0.17844900488853455, 'timestamp': '2025-10-01 04:24:43.194102', 'step': 6285, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:43.247102', 'step': 6285, 'epoch': 1} {'type': 'loss', 'content': 0.17096245288848877, 'timestamp': '2025-10-01 04:24:43.249033', 'step': 6286, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:43.305192', 'step': 6286, 'epoch': 1} {'type': 'loss', 'content': 0.16676965355873108, 'timestamp': '2025-10-01 04:24:43.307386', 'step': 6287, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:24:43.361253', 'step': 6287, 'epoch': 1} {'type': 'loss', 'content': 0.24069948494434357, 'timestamp': '2025-10-01 04:24:43.367044', 'step': 6288, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:43.419569', 'step': 6288, 'epoch': 1} {'type': 'loss', 'content': 0.18841280043125153, 'timestamp': '2025-10-01 04:24:43.421955', 'step': 6289, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:43.475470', 'step': 6289, 'epoch': 1} {'type': 'loss', 'content': 0.15530776977539062, 'timestamp': '2025-10-01 04:24:43.477569', 'step': 6290, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:24:43.531221', 'step': 6290, 'epoch': 1} {'type': 'loss', 'content': 0.22844256460666656, 'timestamp': '2025-10-01 04:24:43.533390', 'step': 6291, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:43.607503', 'step': 6291, 'epoch': 1} {'type': 'loss', 'content': 0.2339978814125061, 'timestamp': '2025-10-01 04:24:43.613288', 'step': 6292, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:24:43.665504', 'step': 6292, 'epoch': 1} {'type': 'loss', 'content': 0.12750600278377533, 'timestamp': '2025-10-01 04:24:43.667770', 'step': 6293, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:43.721837', 'step': 6293, 'epoch': 1} {'type': 'loss', 'content': 0.3589765727519989, 'timestamp': '2025-10-01 04:24:43.724160', 'step': 6294, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:24:43.781494', 'step': 6294, 'epoch': 1} {'type': 'loss', 'content': 0.19581320881843567, 'timestamp': '2025-10-01 04:24:43.784133', 'step': 6295, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:43.837426', 'step': 6295, 'epoch': 1} {'type': 'loss', 'content': 0.12276050448417664, 'timestamp': '2025-10-01 04:24:43.843435', 'step': 6296, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:24:43.902313', 'step': 6296, 'epoch': 1} {'type': 'loss', 'content': 0.1558598279953003, 'timestamp': '2025-10-01 04:24:43.904455', 'step': 6297, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:43.957486', 'step': 6297, 'epoch': 1} {'type': 'loss', 'content': 0.1757327914237976, 'timestamp': '2025-10-01 04:24:43.960268', 'step': 6298, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:24:44.014447', 'step': 6298, 'epoch': 1} {'type': 'loss', 'content': 0.16167883574962616, 'timestamp': '2025-10-01 04:24:44.017004', 'step': 6299, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:44.071372', 'step': 6299, 'epoch': 1} {'type': 'loss', 'content': 0.21266421675682068, 'timestamp': '2025-10-01 04:24:44.094947', 'step': 6300, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:44.162966', 'step': 6300, 'epoch': 1} {'type': 'loss', 'content': 0.20530149340629578, 'timestamp': '2025-10-01 04:24:44.165743', 'step': 6301, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:44.239005', 'step': 6301, 'epoch': 1} {'type': 'loss', 'content': 0.15237854421138763, 'timestamp': '2025-10-01 04:24:44.248569', 'step': 6302, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:24:44.326480', 'step': 6302, 'epoch': 1} {'type': 'loss', 'content': 0.11993253976106644, 'timestamp': '2025-10-01 04:24:44.334191', 'step': 6303, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:44.420734', 'step': 6303, 'epoch': 1} {'type': 'loss', 'content': 0.15606684982776642, 'timestamp': '2025-10-01 04:24:44.427232', 'step': 6304, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:24:44.503949', 'step': 6304, 'epoch': 1} {'type': 'loss', 'content': 0.12296552211046219, 'timestamp': '2025-10-01 04:24:44.510358', 'step': 6305, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:44.568325', 'step': 6305, 'epoch': 1} {'type': 'loss', 'content': 0.14535613358020782, 'timestamp': '2025-10-01 04:24:44.579597', 'step': 6306, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:44.656287', 'step': 6306, 'epoch': 1} {'type': 'loss', 'content': 0.19410063326358795, 'timestamp': '2025-10-01 04:24:44.663446', 'step': 6307, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-01 04:24:44.736314', 'step': 6307, 'epoch': 1} {'type': 'loss', 'content': 0.09007048606872559, 'timestamp': '2025-10-01 04:24:44.762773', 'step': 6308, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:44.834386', 'step': 6308, 'epoch': 1} {'type': 'loss', 'content': 0.11097913980484009, 'timestamp': '2025-10-01 04:24:44.840841', 'step': 6309, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:44.896399', 'step': 6309, 'epoch': 1} {'type': 'loss', 'content': 0.13655482232570648, 'timestamp': '2025-10-01 04:24:44.902057', 'step': 6310, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:44.977786', 'step': 6310, 'epoch': 1} {'type': 'loss', 'content': 0.14359423518180847, 'timestamp': '2025-10-01 04:24:44.980836', 'step': 6311, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:45.067075', 'step': 6311, 'epoch': 1} {'type': 'loss', 'content': 0.08209271728992462, 'timestamp': '2025-10-01 04:24:45.077675', 'step': 6312, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:45.150235', 'step': 6312, 'epoch': 1} {'type': 'loss', 'content': 0.1498657464981079, 'timestamp': '2025-10-01 04:24:45.152716', 'step': 6313, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:45.210882', 'step': 6313, 'epoch': 1} {'type': 'loss', 'content': 0.13262034952640533, 'timestamp': '2025-10-01 04:24:45.215998', 'step': 6314, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:45.277272', 'step': 6314, 'epoch': 1} {'type': 'loss', 'content': 0.2549600899219513, 'timestamp': '2025-10-01 04:24:45.294345', 'step': 6315, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:45.361057', 'step': 6315, 'epoch': 1} {'type': 'loss', 'content': 0.19090767204761505, 'timestamp': '2025-10-01 04:24:45.366576', 'step': 6316, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:45.418966', 'step': 6316, 'epoch': 1} {'type': 'loss', 'content': 0.19886742532253265, 'timestamp': '2025-10-01 04:24:45.423682', 'step': 6317, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:45.485366', 'step': 6317, 'epoch': 1} {'type': 'loss', 'content': 0.3260123133659363, 'timestamp': '2025-10-01 04:24:45.489103', 'step': 6318, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:45.545187', 'step': 6318, 'epoch': 1} {'type': 'loss', 'content': 0.30531182885169983, 'timestamp': '2025-10-01 04:24:45.547092', 'step': 6319, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:24:45.599679', 'step': 6319, 'epoch': 1} {'type': 'loss', 'content': 0.1462804079055786, 'timestamp': '2025-10-01 04:24:45.605230', 'step': 6320, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:45.658543', 'step': 6320, 'epoch': 1} {'type': 'loss', 'content': 0.16780176758766174, 'timestamp': '2025-10-01 04:24:45.660565', 'step': 6321, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:45.714414', 'step': 6321, 'epoch': 1} {'type': 'loss', 'content': 0.21801091730594635, 'timestamp': '2025-10-01 04:24:45.716461', 'step': 6322, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:24:45.774160', 'step': 6322, 'epoch': 1} {'type': 'loss', 'content': 0.1296454817056656, 'timestamp': '2025-10-01 04:24:45.776300', 'step': 6323, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:45.829498', 'step': 6323, 'epoch': 1} {'type': 'loss', 'content': 0.1276586949825287, 'timestamp': '2025-10-01 04:24:45.836495', 'step': 6324, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:45.891718', 'step': 6324, 'epoch': 1} {'type': 'loss', 'content': 0.1502874344587326, 'timestamp': '2025-10-01 04:24:45.894246', 'step': 6325, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:24:45.947790', 'step': 6325, 'epoch': 1} {'type': 'loss', 'content': 0.1240491047501564, 'timestamp': '2025-10-01 04:24:45.950379', 'step': 6326, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:46.003320', 'step': 6326, 'epoch': 1} {'type': 'loss', 'content': 0.09997498989105225, 'timestamp': '2025-10-01 04:24:46.005499', 'step': 6327, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:46.059194', 'step': 6327, 'epoch': 1} {'type': 'loss', 'content': 0.1586751937866211, 'timestamp': '2025-10-01 04:24:46.064793', 'step': 6328, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:46.117351', 'step': 6328, 'epoch': 1} {'type': 'loss', 'content': 0.12551268935203552, 'timestamp': '2025-10-01 04:24:46.119345', 'step': 6329, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:46.172542', 'step': 6329, 'epoch': 1} {'type': 'loss', 'content': 0.14091329276561737, 'timestamp': '2025-10-01 04:24:46.174472', 'step': 6330, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:46.228364', 'step': 6330, 'epoch': 1} {'type': 'loss', 'content': 0.1570301204919815, 'timestamp': '2025-10-01 04:24:46.230993', 'step': 6331, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:46.294797', 'step': 6331, 'epoch': 1} {'type': 'loss', 'content': 0.11999616026878357, 'timestamp': '2025-10-01 04:24:46.300476', 'step': 6332, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:24:46.352880', 'step': 6332, 'epoch': 1} {'type': 'loss', 'content': 0.1181957945227623, 'timestamp': '2025-10-01 04:24:46.355558', 'step': 6333, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:46.409254', 'step': 6333, 'epoch': 1} {'type': 'loss', 'content': 0.1424989402294159, 'timestamp': '2025-10-01 04:24:46.412585', 'step': 6334, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:46.466782', 'step': 6334, 'epoch': 1} {'type': 'loss', 'content': 0.12270479649305344, 'timestamp': '2025-10-01 04:24:46.468880', 'step': 6335, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:46.521835', 'step': 6335, 'epoch': 1} {'type': 'loss', 'content': 0.20007053017616272, 'timestamp': '2025-10-01 04:24:46.527282', 'step': 6336, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:24:46.580354', 'step': 6336, 'epoch': 1} {'type': 'loss', 'content': 0.13619166612625122, 'timestamp': '2025-10-01 04:24:46.587677', 'step': 6337, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:24:46.640219', 'step': 6337, 'epoch': 1} {'type': 'loss', 'content': 0.1620253324508667, 'timestamp': '2025-10-01 04:24:46.643519', 'step': 6338, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:24:46.696160', 'step': 6338, 'epoch': 1} {'type': 'loss', 'content': 0.19805297255516052, 'timestamp': '2025-10-01 04:24:46.698228', 'step': 6339, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:46.751269', 'step': 6339, 'epoch': 1} {'type': 'loss', 'content': 0.1393618881702423, 'timestamp': '2025-10-01 04:24:46.757168', 'step': 6340, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:46.810020', 'step': 6340, 'epoch': 1} {'type': 'loss', 'content': 0.12557785212993622, 'timestamp': '2025-10-01 04:24:46.812161', 'step': 6341, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:46.866017', 'step': 6341, 'epoch': 1} {'type': 'loss', 'content': 0.18962837755680084, 'timestamp': '2025-10-01 04:24:46.868283', 'step': 6342, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:46.922143', 'step': 6342, 'epoch': 1} {'type': 'loss', 'content': 0.22830693423748016, 'timestamp': '2025-10-01 04:24:46.924315', 'step': 6343, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:46.990155', 'step': 6343, 'epoch': 1} {'type': 'loss', 'content': 0.15091601014137268, 'timestamp': '2025-10-01 04:24:46.996001', 'step': 6344, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:47.048317', 'step': 6344, 'epoch': 1} {'type': 'loss', 'content': 0.22734498977661133, 'timestamp': '2025-10-01 04:24:47.050248', 'step': 6345, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:47.103617', 'step': 6345, 'epoch': 1} {'type': 'loss', 'content': 0.11615975946187973, 'timestamp': '2025-10-01 04:24:47.105609', 'step': 6346, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:47.159398', 'step': 6346, 'epoch': 1} {'type': 'loss', 'content': 0.1272045075893402, 'timestamp': '2025-10-01 04:24:47.161419', 'step': 6347, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:47.213970', 'step': 6347, 'epoch': 1} {'type': 'loss', 'content': 0.12617768347263336, 'timestamp': '2025-10-01 04:24:47.221556', 'step': 6348, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:47.275843', 'step': 6348, 'epoch': 1} {'type': 'loss', 'content': 0.15523555874824524, 'timestamp': '2025-10-01 04:24:47.278125', 'step': 6349, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:47.331673', 'step': 6349, 'epoch': 1} {'type': 'loss', 'content': 0.18044789135456085, 'timestamp': '2025-10-01 04:24:47.333630', 'step': 6350, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:47.386145', 'step': 6350, 'epoch': 1} {'type': 'loss', 'content': 0.17440839111804962, 'timestamp': '2025-10-01 04:24:47.388556', 'step': 6351, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:24:47.441878', 'step': 6351, 'epoch': 1} {'type': 'loss', 'content': 0.15519963204860687, 'timestamp': '2025-10-01 04:24:47.447472', 'step': 6352, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:47.500860', 'step': 6352, 'epoch': 1} {'type': 'loss', 'content': 0.13900311291217804, 'timestamp': '2025-10-01 04:24:47.503242', 'step': 6353, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:47.556694', 'step': 6353, 'epoch': 1} {'type': 'loss', 'content': 0.13571707904338837, 'timestamp': '2025-10-01 04:24:47.559168', 'step': 6354, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:47.613873', 'step': 6354, 'epoch': 1} {'type': 'loss', 'content': 0.19502826035022736, 'timestamp': '2025-10-01 04:24:47.616130', 'step': 6355, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:47.678119', 'step': 6355, 'epoch': 1} {'type': 'loss', 'content': 0.1543663889169693, 'timestamp': '2025-10-01 04:24:47.684274', 'step': 6356, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:24:47.737984', 'step': 6356, 'epoch': 1} {'type': 'loss', 'content': 0.14841428399085999, 'timestamp': '2025-10-01 04:24:47.740602', 'step': 6357, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:47.794691', 'step': 6357, 'epoch': 1} {'type': 'loss', 'content': 0.14459896087646484, 'timestamp': '2025-10-01 04:24:47.797166', 'step': 6358, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:24:47.850823', 'step': 6358, 'epoch': 1} {'type': 'loss', 'content': 0.13285250961780548, 'timestamp': '2025-10-01 04:24:47.863055', 'step': 6359, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:47.925192', 'step': 6359, 'epoch': 1} {'type': 'loss', 'content': 0.2736632823944092, 'timestamp': '2025-10-01 04:24:47.931174', 'step': 6360, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:47.984527', 'step': 6360, 'epoch': 1} {'type': 'loss', 'content': 0.11000166088342667, 'timestamp': '2025-10-01 04:24:47.986728', 'step': 6361, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:48.040464', 'step': 6361, 'epoch': 1} {'type': 'loss', 'content': 0.11087815463542938, 'timestamp': '2025-10-01 04:24:48.042689', 'step': 6362, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:48.097795', 'step': 6362, 'epoch': 1} {'type': 'loss', 'content': 0.11438465863466263, 'timestamp': '2025-10-01 04:24:48.100101', 'step': 6363, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:48.154411', 'step': 6363, 'epoch': 1} {'type': 'loss', 'content': 0.2328079789876938, 'timestamp': '2025-10-01 04:24:48.160603', 'step': 6364, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:48.214467', 'step': 6364, 'epoch': 1} {'type': 'loss', 'content': 0.19253574311733246, 'timestamp': '2025-10-01 04:24:48.216924', 'step': 6365, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:48.270656', 'step': 6365, 'epoch': 1} {'type': 'loss', 'content': 0.13267706334590912, 'timestamp': '2025-10-01 04:24:48.273473', 'step': 6366, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:48.327556', 'step': 6366, 'epoch': 1} {'type': 'loss', 'content': 0.10128379613161087, 'timestamp': '2025-10-01 04:24:48.330266', 'step': 6367, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:24:48.384336', 'step': 6367, 'epoch': 1} {'type': 'loss', 'content': 0.14507344365119934, 'timestamp': '2025-10-01 04:24:48.390842', 'step': 6368, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:24:48.444714', 'step': 6368, 'epoch': 1} {'type': 'loss', 'content': 0.16132886707782745, 'timestamp': '2025-10-01 04:24:48.447153', 'step': 6369, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:48.501119', 'step': 6369, 'epoch': 1} {'type': 'loss', 'content': 0.1808852106332779, 'timestamp': '2025-10-01 04:24:48.503279', 'step': 6370, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:48.558483', 'step': 6370, 'epoch': 1} {'type': 'loss', 'content': 0.15441659092903137, 'timestamp': '2025-10-01 04:24:48.560998', 'step': 6371, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:48.614604', 'step': 6371, 'epoch': 1} {'type': 'loss', 'content': 0.08677338808774948, 'timestamp': '2025-10-01 04:24:48.620835', 'step': 6372, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:48.675128', 'step': 6372, 'epoch': 1} {'type': 'loss', 'content': 0.16475479304790497, 'timestamp': '2025-10-01 04:24:48.678218', 'step': 6373, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:48.731974', 'step': 6373, 'epoch': 1} {'type': 'loss', 'content': 0.10030699521303177, 'timestamp': '2025-10-01 04:24:48.734662', 'step': 6374, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:48.788814', 'step': 6374, 'epoch': 1} {'type': 'loss', 'content': 0.16471245884895325, 'timestamp': '2025-10-01 04:24:48.793724', 'step': 6375, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:48.859166', 'step': 6375, 'epoch': 1} {'type': 'loss', 'content': 0.21874241530895233, 'timestamp': '2025-10-01 04:24:48.874132', 'step': 6376, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:48.926994', 'step': 6376, 'epoch': 1} {'type': 'loss', 'content': 0.2054988145828247, 'timestamp': '2025-10-01 04:24:48.929173', 'step': 6377, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:48.982432', 'step': 6377, 'epoch': 1} {'type': 'loss', 'content': 0.1180645003914833, 'timestamp': '2025-10-01 04:24:48.993314', 'step': 6378, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:24:49.047495', 'step': 6378, 'epoch': 1} {'type': 'loss', 'content': 0.19235265254974365, 'timestamp': '2025-10-01 04:24:49.049662', 'step': 6379, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:49.103150', 'step': 6379, 'epoch': 1} {'type': 'loss', 'content': 0.20982185006141663, 'timestamp': '2025-10-01 04:24:49.110997', 'step': 6380, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:49.163704', 'step': 6380, 'epoch': 1} {'type': 'loss', 'content': 0.1010090783238411, 'timestamp': '2025-10-01 04:24:49.173664', 'step': 6381, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:24:49.228059', 'step': 6381, 'epoch': 1} {'type': 'loss', 'content': 0.16870050132274628, 'timestamp': '2025-10-01 04:24:49.230868', 'step': 6382, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:49.285199', 'step': 6382, 'epoch': 1} {'type': 'loss', 'content': 0.14741158485412598, 'timestamp': '2025-10-01 04:24:49.287658', 'step': 6383, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:49.342302', 'step': 6383, 'epoch': 1} {'type': 'loss', 'content': 0.11254352331161499, 'timestamp': '2025-10-01 04:24:49.347958', 'step': 6384, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:49.419983', 'step': 6384, 'epoch': 1} {'type': 'loss', 'content': 0.19616363942623138, 'timestamp': '2025-10-01 04:24:49.422503', 'step': 6385, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:49.476014', 'step': 6385, 'epoch': 1} {'type': 'loss', 'content': 0.21942000091075897, 'timestamp': '2025-10-01 04:24:49.478370', 'step': 6386, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:24:49.532420', 'step': 6386, 'epoch': 1} {'type': 'loss', 'content': 0.22065933048725128, 'timestamp': '2025-10-01 04:24:49.534440', 'step': 6387, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:49.587183', 'step': 6387, 'epoch': 1} {'type': 'loss', 'content': 0.19369067251682281, 'timestamp': '2025-10-01 04:24:49.593682', 'step': 6388, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:49.645967', 'step': 6388, 'epoch': 1} {'type': 'loss', 'content': 0.16536828875541687, 'timestamp': '2025-10-01 04:24:49.648153', 'step': 6389, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:24:49.701198', 'step': 6389, 'epoch': 1} {'type': 'loss', 'content': 0.16537947952747345, 'timestamp': '2025-10-01 04:24:49.703501', 'step': 6390, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:49.756682', 'step': 6390, 'epoch': 1} {'type': 'loss', 'content': 0.1172284483909607, 'timestamp': '2025-10-01 04:24:49.758578', 'step': 6391, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:49.811357', 'step': 6391, 'epoch': 1} {'type': 'loss', 'content': 0.22677874565124512, 'timestamp': '2025-10-01 04:24:49.817101', 'step': 6392, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:49.869094', 'step': 6392, 'epoch': 1} {'type': 'loss', 'content': 0.1445273458957672, 'timestamp': '2025-10-01 04:24:49.870924', 'step': 6393, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:49.924337', 'step': 6393, 'epoch': 1} {'type': 'loss', 'content': 0.17128682136535645, 'timestamp': '2025-10-01 04:24:49.927524', 'step': 6394, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:49.980433', 'step': 6394, 'epoch': 1} {'type': 'loss', 'content': 0.185492604970932, 'timestamp': '2025-10-01 04:24:49.982800', 'step': 6395, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:24:50.035724', 'step': 6395, 'epoch': 1} {'type': 'loss', 'content': 0.22685940563678741, 'timestamp': '2025-10-01 04:24:50.041578', 'step': 6396, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:50.095110', 'step': 6396, 'epoch': 1} {'type': 'loss', 'content': 0.21879826486110687, 'timestamp': '2025-10-01 04:24:50.097092', 'step': 6397, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:50.157749', 'step': 6397, 'epoch': 1} {'type': 'loss', 'content': 0.18115171790122986, 'timestamp': '2025-10-01 04:24:50.159791', 'step': 6398, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:50.213017', 'step': 6398, 'epoch': 1} {'type': 'loss', 'content': 0.1816251128911972, 'timestamp': '2025-10-01 04:24:50.215465', 'step': 6399, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:50.276418', 'step': 6399, 'epoch': 1} {'type': 'loss', 'content': 0.10879505425691605, 'timestamp': '2025-10-01 04:24:50.282872', 'step': 6400, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:50.335389', 'step': 6400, 'epoch': 1} {'type': 'loss', 'content': 0.21657614409923553, 'timestamp': '2025-10-01 04:24:50.337542', 'step': 6401, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:50.390877', 'step': 6401, 'epoch': 1} {'type': 'loss', 'content': 0.1877107322216034, 'timestamp': '2025-10-01 04:24:50.393153', 'step': 6402, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:50.446588', 'step': 6402, 'epoch': 1} {'type': 'loss', 'content': 0.19936251640319824, 'timestamp': '2025-10-01 04:24:50.448908', 'step': 6403, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:50.501310', 'step': 6403, 'epoch': 1} {'type': 'loss', 'content': 0.13643676042556763, 'timestamp': '2025-10-01 04:24:50.507012', 'step': 6404, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:50.560037', 'step': 6404, 'epoch': 1} {'type': 'loss', 'content': 0.20800098776817322, 'timestamp': '2025-10-01 04:24:50.561934', 'step': 6405, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:50.614852', 'step': 6405, 'epoch': 1} {'type': 'loss', 'content': 0.09137385338544846, 'timestamp': '2025-10-01 04:24:50.617166', 'step': 6406, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:24:50.670519', 'step': 6406, 'epoch': 1} {'type': 'loss', 'content': 0.20058473944664001, 'timestamp': '2025-10-01 04:24:50.672596', 'step': 6407, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:50.725748', 'step': 6407, 'epoch': 1} {'type': 'loss', 'content': 0.12353072315454483, 'timestamp': '2025-10-01 04:24:50.732180', 'step': 6408, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:50.785598', 'step': 6408, 'epoch': 1} {'type': 'loss', 'content': 0.10943061858415604, 'timestamp': '2025-10-01 04:24:50.787752', 'step': 6409, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:50.841213', 'step': 6409, 'epoch': 1} {'type': 'loss', 'content': 0.13264349102973938, 'timestamp': '2025-10-01 04:24:50.843370', 'step': 6410, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:50.896663', 'step': 6410, 'epoch': 1} {'type': 'loss', 'content': 0.09046045690774918, 'timestamp': '2025-10-01 04:24:50.899016', 'step': 6411, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:50.952953', 'step': 6411, 'epoch': 1} {'type': 'loss', 'content': 0.1066431999206543, 'timestamp': '2025-10-01 04:24:50.958514', 'step': 6412, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:24:51.013044', 'step': 6412, 'epoch': 1} {'type': 'loss', 'content': 0.1396484524011612, 'timestamp': '2025-10-01 04:24:51.015213', 'step': 6413, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:24:51.070048', 'step': 6413, 'epoch': 1} {'type': 'loss', 'content': 0.2208855152130127, 'timestamp': '2025-10-01 04:24:51.072282', 'step': 6414, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:51.125416', 'step': 6414, 'epoch': 1} {'type': 'loss', 'content': 0.14219732582569122, 'timestamp': '2025-10-01 04:24:51.127570', 'step': 6415, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:51.180837', 'step': 6415, 'epoch': 1} {'type': 'loss', 'content': 0.14043594896793365, 'timestamp': '2025-10-01 04:24:51.186981', 'step': 6416, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:51.239556', 'step': 6416, 'epoch': 1} {'type': 'loss', 'content': 0.10136973112821579, 'timestamp': '2025-10-01 04:24:51.241594', 'step': 6417, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:51.294721', 'step': 6417, 'epoch': 1} {'type': 'loss', 'content': 0.08781079202890396, 'timestamp': '2025-10-01 04:24:51.297318', 'step': 6418, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:51.349825', 'step': 6418, 'epoch': 1} {'type': 'loss', 'content': 0.17611953616142273, 'timestamp': '2025-10-01 04:24:51.351948', 'step': 6419, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:51.405134', 'step': 6419, 'epoch': 1} {'type': 'loss', 'content': 0.1631825715303421, 'timestamp': '2025-10-01 04:24:51.410743', 'step': 6420, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:51.462733', 'step': 6420, 'epoch': 1} {'type': 'loss', 'content': 0.15576647222042084, 'timestamp': '2025-10-01 04:24:51.464974', 'step': 6421, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:51.518389', 'step': 6421, 'epoch': 1} {'type': 'loss', 'content': 0.27724194526672363, 'timestamp': '2025-10-01 04:24:51.520540', 'step': 6422, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:51.573531', 'step': 6422, 'epoch': 1} {'type': 'loss', 'content': 0.11468088626861572, 'timestamp': '2025-10-01 04:24:51.576484', 'step': 6423, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:24:51.640242', 'step': 6423, 'epoch': 1} {'type': 'loss', 'content': 0.10974957048892975, 'timestamp': '2025-10-01 04:24:51.645890', 'step': 6424, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:51.699991', 'step': 6424, 'epoch': 1} {'type': 'loss', 'content': 0.05782891809940338, 'timestamp': '2025-10-01 04:24:51.702029', 'step': 6425, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:51.754707', 'step': 6425, 'epoch': 1} {'type': 'loss', 'content': 0.08339885622262955, 'timestamp': '2025-10-01 04:24:51.756892', 'step': 6426, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:51.810118', 'step': 6426, 'epoch': 1} {'type': 'loss', 'content': 0.1481817364692688, 'timestamp': '2025-10-01 04:24:51.814762', 'step': 6427, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:24:51.870362', 'step': 6427, 'epoch': 1} {'type': 'loss', 'content': 0.23074285686016083, 'timestamp': '2025-10-01 04:24:51.876166', 'step': 6428, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:51.930648', 'step': 6428, 'epoch': 1} {'type': 'loss', 'content': 0.10901876538991928, 'timestamp': '2025-10-01 04:24:51.938455', 'step': 6429, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:24:51.992308', 'step': 6429, 'epoch': 1} {'type': 'loss', 'content': 0.17407718300819397, 'timestamp': '2025-10-01 04:24:51.994578', 'step': 6430, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:52.047688', 'step': 6430, 'epoch': 1} {'type': 'loss', 'content': 0.14874312281608582, 'timestamp': '2025-10-01 04:24:52.063520', 'step': 6431, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:52.116622', 'step': 6431, 'epoch': 1} {'type': 'loss', 'content': 0.19174344837665558, 'timestamp': '2025-10-01 04:24:52.122268', 'step': 6432, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:52.195772', 'step': 6432, 'epoch': 1} {'type': 'loss', 'content': 0.14321637153625488, 'timestamp': '2025-10-01 04:24:52.197788', 'step': 6433, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:52.250219', 'step': 6433, 'epoch': 1} {'type': 'loss', 'content': 0.10611701011657715, 'timestamp': '2025-10-01 04:24:52.254065', 'step': 6434, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:52.307369', 'step': 6434, 'epoch': 1} {'type': 'loss', 'content': 0.19071342051029205, 'timestamp': '2025-10-01 04:24:52.322576', 'step': 6435, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:52.375459', 'step': 6435, 'epoch': 1} {'type': 'loss', 'content': 0.15988130867481232, 'timestamp': '2025-10-01 04:24:52.381142', 'step': 6436, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:52.435576', 'step': 6436, 'epoch': 1} {'type': 'loss', 'content': 0.19264574348926544, 'timestamp': '2025-10-01 04:24:52.442579', 'step': 6437, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:52.497055', 'step': 6437, 'epoch': 1} {'type': 'loss', 'content': 0.12119001150131226, 'timestamp': '2025-10-01 04:24:52.499077', 'step': 6438, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:52.552430', 'step': 6438, 'epoch': 1} {'type': 'loss', 'content': 0.14822819828987122, 'timestamp': '2025-10-01 04:24:52.567051', 'step': 6439, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:52.620382', 'step': 6439, 'epoch': 1} {'type': 'loss', 'content': 0.19053512811660767, 'timestamp': '2025-10-01 04:24:52.626612', 'step': 6440, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:52.680090', 'step': 6440, 'epoch': 1} {'type': 'loss', 'content': 0.17068761587142944, 'timestamp': '2025-10-01 04:24:52.693160', 'step': 6441, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:52.761845', 'step': 6441, 'epoch': 1} {'type': 'loss', 'content': 0.15018530189990997, 'timestamp': '2025-10-01 04:24:52.763902', 'step': 6442, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:52.816947', 'step': 6442, 'epoch': 1} {'type': 'loss', 'content': 0.11018826812505722, 'timestamp': '2025-10-01 04:24:52.819182', 'step': 6443, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:52.873474', 'step': 6443, 'epoch': 1} {'type': 'loss', 'content': 0.15614406764507294, 'timestamp': '2025-10-01 04:24:52.879457', 'step': 6444, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:52.932379', 'step': 6444, 'epoch': 1} {'type': 'loss', 'content': 0.1668100655078888, 'timestamp': '2025-10-01 04:24:52.934499', 'step': 6445, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:52.987437', 'step': 6445, 'epoch': 1} {'type': 'loss', 'content': 0.2308671623468399, 'timestamp': '2025-10-01 04:24:52.989537', 'step': 6446, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:53.042780', 'step': 6446, 'epoch': 1} {'type': 'loss', 'content': 0.2318643182516098, 'timestamp': '2025-10-01 04:24:53.058936', 'step': 6447, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:53.113110', 'step': 6447, 'epoch': 1} {'type': 'loss', 'content': 0.23031175136566162, 'timestamp': '2025-10-01 04:24:53.118830', 'step': 6448, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:53.172088', 'step': 6448, 'epoch': 1} {'type': 'loss', 'content': 0.18341457843780518, 'timestamp': '2025-10-01 04:24:53.174319', 'step': 6449, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:24:53.241287', 'step': 6449, 'epoch': 1} {'type': 'loss', 'content': 0.10780327767133713, 'timestamp': '2025-10-01 04:24:53.254483', 'step': 6450, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:53.317534', 'step': 6450, 'epoch': 1} {'type': 'loss', 'content': 0.17165003716945648, 'timestamp': '2025-10-01 04:24:53.319602', 'step': 6451, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:53.374209', 'step': 6451, 'epoch': 1} {'type': 'loss', 'content': 0.14995147287845612, 'timestamp': '2025-10-01 04:24:53.381063', 'step': 6452, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:53.445455', 'step': 6452, 'epoch': 1} {'type': 'loss', 'content': 0.13710595667362213, 'timestamp': '2025-10-01 04:24:53.459593', 'step': 6453, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:53.514135', 'step': 6453, 'epoch': 1} {'type': 'loss', 'content': 0.13855500519275665, 'timestamp': '2025-10-01 04:24:53.516036', 'step': 6454, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:24:53.569583', 'step': 6454, 'epoch': 1} {'type': 'loss', 'content': 0.14184127748012543, 'timestamp': '2025-10-01 04:24:53.573705', 'step': 6455, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:24:53.627589', 'step': 6455, 'epoch': 1} {'type': 'loss', 'content': 0.159775972366333, 'timestamp': '2025-10-01 04:24:53.633280', 'step': 6456, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:53.685697', 'step': 6456, 'epoch': 1} {'type': 'loss', 'content': 0.15386474132537842, 'timestamp': '2025-10-01 04:24:53.698985', 'step': 6457, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:53.755446', 'step': 6457, 'epoch': 1} {'type': 'loss', 'content': 0.08373398333787918, 'timestamp': '2025-10-01 04:24:53.771038', 'step': 6458, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:53.825295', 'step': 6458, 'epoch': 1} {'type': 'loss', 'content': 0.17773044109344482, 'timestamp': '2025-10-01 04:24:53.827380', 'step': 6459, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:53.881317', 'step': 6459, 'epoch': 1} {'type': 'loss', 'content': 0.1004602462053299, 'timestamp': '2025-10-01 04:24:53.886947', 'step': 6460, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:53.941316', 'step': 6460, 'epoch': 1} {'type': 'loss', 'content': 0.17091374099254608, 'timestamp': '2025-10-01 04:24:53.943316', 'step': 6461, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:24:53.996334', 'step': 6461, 'epoch': 1} {'type': 'loss', 'content': 0.1020553931593895, 'timestamp': '2025-10-01 04:24:53.998563', 'step': 6462, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:24:54.053072', 'step': 6462, 'epoch': 1} {'type': 'loss', 'content': 0.21573039889335632, 'timestamp': '2025-10-01 04:24:54.055730', 'step': 6463, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:54.108781', 'step': 6463, 'epoch': 1} {'type': 'loss', 'content': 0.16792383790016174, 'timestamp': '2025-10-01 04:24:54.114458', 'step': 6464, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:54.182496', 'step': 6464, 'epoch': 1} {'type': 'loss', 'content': 0.19426901638507843, 'timestamp': '2025-10-01 04:24:54.184775', 'step': 6465, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:54.238286', 'step': 6465, 'epoch': 1} {'type': 'loss', 'content': 0.10596990585327148, 'timestamp': '2025-10-01 04:24:54.240670', 'step': 6466, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:54.294309', 'step': 6466, 'epoch': 1} {'type': 'loss', 'content': 0.11920268833637238, 'timestamp': '2025-10-01 04:24:54.296302', 'step': 6467, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:54.350575', 'step': 6467, 'epoch': 1} {'type': 'loss', 'content': 0.24738825857639313, 'timestamp': '2025-10-01 04:24:54.356464', 'step': 6468, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:54.409051', 'step': 6468, 'epoch': 1} {'type': 'loss', 'content': 0.14692635834217072, 'timestamp': '2025-10-01 04:24:54.411267', 'step': 6469, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:54.465497', 'step': 6469, 'epoch': 1} {'type': 'loss', 'content': 0.09593238681554794, 'timestamp': '2025-10-01 04:24:54.468007', 'step': 6470, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:54.521233', 'step': 6470, 'epoch': 1} {'type': 'loss', 'content': 0.20612049102783203, 'timestamp': '2025-10-01 04:24:54.524459', 'step': 6471, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:54.579285', 'step': 6471, 'epoch': 1} {'type': 'loss', 'content': 0.1045636534690857, 'timestamp': '2025-10-01 04:24:54.585591', 'step': 6472, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:54.652090', 'step': 6472, 'epoch': 1} {'type': 'loss', 'content': 0.1024833470582962, 'timestamp': '2025-10-01 04:24:54.654530', 'step': 6473, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:54.736724', 'step': 6473, 'epoch': 1} {'type': 'loss', 'content': 0.10939175635576248, 'timestamp': '2025-10-01 04:24:54.739786', 'step': 6474, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:24:54.823434', 'step': 6474, 'epoch': 1} {'type': 'loss', 'content': 0.23221686482429504, 'timestamp': '2025-10-01 04:24:54.826662', 'step': 6475, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:54.880214', 'step': 6475, 'epoch': 1} {'type': 'loss', 'content': 0.1320423036813736, 'timestamp': '2025-10-01 04:24:54.885913', 'step': 6476, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:54.939316', 'step': 6476, 'epoch': 1} {'type': 'loss', 'content': 0.14741086959838867, 'timestamp': '2025-10-01 04:24:54.941938', 'step': 6477, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:54.995539', 'step': 6477, 'epoch': 1} {'type': 'loss', 'content': 0.18387041985988617, 'timestamp': '2025-10-01 04:24:54.997665', 'step': 6478, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:55.050702', 'step': 6478, 'epoch': 1} {'type': 'loss', 'content': 0.20295439660549164, 'timestamp': '2025-10-01 04:24:55.053313', 'step': 6479, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:55.110668', 'step': 6479, 'epoch': 1} {'type': 'loss', 'content': 0.14516563713550568, 'timestamp': '2025-10-01 04:24:55.117194', 'step': 6480, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:55.171573', 'step': 6480, 'epoch': 1} {'type': 'loss', 'content': 0.1800619512796402, 'timestamp': '2025-10-01 04:24:55.174125', 'step': 6481, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:55.231021', 'step': 6481, 'epoch': 1} {'type': 'loss', 'content': 0.13966509699821472, 'timestamp': '2025-10-01 04:24:55.233409', 'step': 6482, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:55.289371', 'step': 6482, 'epoch': 1} {'type': 'loss', 'content': 0.16561482846736908, 'timestamp': '2025-10-01 04:24:55.291695', 'step': 6483, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:55.347480', 'step': 6483, 'epoch': 1} {'type': 'loss', 'content': 0.13915374875068665, 'timestamp': '2025-10-01 04:24:55.354727', 'step': 6484, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:55.420024', 'step': 6484, 'epoch': 1} {'type': 'loss', 'content': 0.17758291959762573, 'timestamp': '2025-10-01 04:24:55.422453', 'step': 6485, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:24:55.479021', 'step': 6485, 'epoch': 1} {'type': 'loss', 'content': 0.13056792318820953, 'timestamp': '2025-10-01 04:24:55.481488', 'step': 6486, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:55.535927', 'step': 6486, 'epoch': 1} {'type': 'loss', 'content': 0.17308829724788666, 'timestamp': '2025-10-01 04:24:55.538824', 'step': 6487, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:24:55.606924', 'step': 6487, 'epoch': 1} {'type': 'loss', 'content': 0.11874375492334366, 'timestamp': '2025-10-01 04:24:55.624244', 'step': 6488, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:55.677747', 'step': 6488, 'epoch': 1} {'type': 'loss', 'content': 0.14867860078811646, 'timestamp': '2025-10-01 04:24:55.680111', 'step': 6489, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:55.734958', 'step': 6489, 'epoch': 1} {'type': 'loss', 'content': 0.15221814811229706, 'timestamp': '2025-10-01 04:24:55.738015', 'step': 6490, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:55.792141', 'step': 6490, 'epoch': 1} {'type': 'loss', 'content': 0.11159002780914307, 'timestamp': '2025-10-01 04:24:55.794449', 'step': 6491, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:24:55.874101', 'step': 6491, 'epoch': 1} {'type': 'loss', 'content': 0.1496618241071701, 'timestamp': '2025-10-01 04:24:55.880283', 'step': 6492, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:55.933216', 'step': 6492, 'epoch': 1} {'type': 'loss', 'content': 0.20512528717517853, 'timestamp': '2025-10-01 04:24:55.936329', 'step': 6493, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:55.991344', 'step': 6493, 'epoch': 1} {'type': 'loss', 'content': 0.1525564044713974, 'timestamp': '2025-10-01 04:24:55.993928', 'step': 6494, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:56.048138', 'step': 6494, 'epoch': 1} {'type': 'loss', 'content': 0.14455409348011017, 'timestamp': '2025-10-01 04:24:56.050628', 'step': 6495, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:56.106118', 'step': 6495, 'epoch': 1} {'type': 'loss', 'content': 0.1447121649980545, 'timestamp': '2025-10-01 04:24:56.112521', 'step': 6496, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:56.167728', 'step': 6496, 'epoch': 1} {'type': 'loss', 'content': 0.14720109105110168, 'timestamp': '2025-10-01 04:24:56.176200', 'step': 6497, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:56.240492', 'step': 6497, 'epoch': 1} {'type': 'loss', 'content': 0.15267004072666168, 'timestamp': '2025-10-01 04:24:56.245104', 'step': 6498, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:56.301835', 'step': 6498, 'epoch': 1} {'type': 'loss', 'content': 0.1999732404947281, 'timestamp': '2025-10-01 04:24:56.307214', 'step': 6499, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:24:56.367278', 'step': 6499, 'epoch': 1} {'type': 'loss', 'content': 0.2736956775188446, 'timestamp': '2025-10-01 04:24:56.373834', 'step': 6500, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 6500', 'timestamp': '2025-10-01 04:24:56.745088', 'step': 6500, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:56.816602', 'step': 6500, 'epoch': 1} {'type': 'loss', 'content': 0.21648336946964264, 'timestamp': '2025-10-01 04:24:56.819105', 'step': 6501, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:24:56.879266', 'step': 6501, 'epoch': 1} {'type': 'loss', 'content': 0.20446181297302246, 'timestamp': '2025-10-01 04:24:56.882312', 'step': 6502, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:56.939357', 'step': 6502, 'epoch': 1} {'type': 'loss', 'content': 0.16006125509738922, 'timestamp': '2025-10-01 04:24:56.941894', 'step': 6503, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:57.001936', 'step': 6503, 'epoch': 1} {'type': 'loss', 'content': 0.21580705046653748, 'timestamp': '2025-10-01 04:24:57.030249', 'step': 6504, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:57.088419', 'step': 6504, 'epoch': 1} {'type': 'loss', 'content': 0.135590597987175, 'timestamp': '2025-10-01 04:24:57.090680', 'step': 6505, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:57.146491', 'step': 6505, 'epoch': 1} {'type': 'loss', 'content': 0.08666449785232544, 'timestamp': '2025-10-01 04:24:57.148691', 'step': 6506, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:57.220780', 'step': 6506, 'epoch': 1} {'type': 'loss', 'content': 0.16272011399269104, 'timestamp': '2025-10-01 04:24:57.223384', 'step': 6507, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:57.279362', 'step': 6507, 'epoch': 1} {'type': 'loss', 'content': 0.16412411630153656, 'timestamp': '2025-10-01 04:24:57.285989', 'step': 6508, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:57.341770', 'step': 6508, 'epoch': 1} {'type': 'loss', 'content': 0.18647530674934387, 'timestamp': '2025-10-01 04:24:57.343936', 'step': 6509, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:57.401401', 'step': 6509, 'epoch': 1} {'type': 'loss', 'content': 0.13377556204795837, 'timestamp': '2025-10-01 04:24:57.404642', 'step': 6510, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:57.461503', 'step': 6510, 'epoch': 1} {'type': 'loss', 'content': 0.14322075247764587, 'timestamp': '2025-10-01 04:24:57.463701', 'step': 6511, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:57.520903', 'step': 6511, 'epoch': 1} {'type': 'loss', 'content': 0.1827361285686493, 'timestamp': '2025-10-01 04:24:57.527450', 'step': 6512, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:57.582449', 'step': 6512, 'epoch': 1} {'type': 'loss', 'content': 0.1264362931251526, 'timestamp': '2025-10-01 04:24:57.584686', 'step': 6513, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:57.652504', 'step': 6513, 'epoch': 1} {'type': 'loss', 'content': 0.18088123202323914, 'timestamp': '2025-10-01 04:24:57.654732', 'step': 6514, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:57.712340', 'step': 6514, 'epoch': 1} {'type': 'loss', 'content': 0.13634315133094788, 'timestamp': '2025-10-01 04:24:57.714882', 'step': 6515, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:57.775031', 'step': 6515, 'epoch': 1} {'type': 'loss', 'content': 0.1626749485731125, 'timestamp': '2025-10-01 04:24:57.783156', 'step': 6516, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:57.839199', 'step': 6516, 'epoch': 1} {'type': 'loss', 'content': 0.10931801050901413, 'timestamp': '2025-10-01 04:24:57.855352', 'step': 6517, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:57.914254', 'step': 6517, 'epoch': 1} {'type': 'loss', 'content': 0.14348767697811127, 'timestamp': '2025-10-01 04:24:57.916640', 'step': 6518, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:57.974798', 'step': 6518, 'epoch': 1} {'type': 'loss', 'content': 0.21489208936691284, 'timestamp': '2025-10-01 04:24:57.976894', 'step': 6519, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:58.034415', 'step': 6519, 'epoch': 1} {'type': 'loss', 'content': 0.12887388467788696, 'timestamp': '2025-10-01 04:24:58.041797', 'step': 6520, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:58.099067', 'step': 6520, 'epoch': 1} {'type': 'loss', 'content': 0.12769721448421478, 'timestamp': '2025-10-01 04:24:58.100854', 'step': 6521, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:24:58.179971', 'step': 6521, 'epoch': 1} {'type': 'loss', 'content': 0.17595826089382172, 'timestamp': '2025-10-01 04:24:58.182258', 'step': 6522, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:24:58.239678', 'step': 6522, 'epoch': 1} {'type': 'loss', 'content': 0.12139260023832321, 'timestamp': '2025-10-01 04:24:58.241871', 'step': 6523, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:24:58.310617', 'step': 6523, 'epoch': 1} {'type': 'loss', 'content': 0.1896987110376358, 'timestamp': '2025-10-01 04:24:58.325466', 'step': 6524, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-01 04:25:12.193658', 'step': 6524, 'epoch': 1} {'type': 'pplx', 'content': 12567.509615069055, 'timestamp': '2025-10-01 04:25:12.196378', 'step': 6524, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:12.249295', 'step': 6524, 'epoch': 1} {'type': 'loss', 'content': 0.1249009221792221, 'timestamp': '2025-10-01 04:25:12.254684', 'step': 6525, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:12.308619', 'step': 6525, 'epoch': 1} {'type': 'loss', 'content': 0.10139144212007523, 'timestamp': '2025-10-01 04:25:12.310943', 'step': 6526, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:12.366492', 'step': 6526, 'epoch': 1} {'type': 'loss', 'content': 0.18542011082172394, 'timestamp': '2025-10-01 04:25:12.368911', 'step': 6527, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:12.425288', 'step': 6527, 'epoch': 1} {'type': 'loss', 'content': 0.10475964099168777, 'timestamp': '2025-10-01 04:25:12.431676', 'step': 6528, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:12.496142', 'step': 6528, 'epoch': 1} {'type': 'loss', 'content': 0.16291777789592743, 'timestamp': '2025-10-01 04:25:12.498360', 'step': 6529, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:12.551280', 'step': 6529, 'epoch': 1} {'type': 'loss', 'content': 0.27420327067375183, 'timestamp': '2025-10-01 04:25:12.553949', 'step': 6530, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:12.607455', 'step': 6530, 'epoch': 1} {'type': 'loss', 'content': 0.17499154806137085, 'timestamp': '2025-10-01 04:25:12.609854', 'step': 6531, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:12.664012', 'step': 6531, 'epoch': 1} {'type': 'loss', 'content': 0.1334693878889084, 'timestamp': '2025-10-01 04:25:12.670467', 'step': 6532, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:25:12.724424', 'step': 6532, 'epoch': 1} {'type': 'loss', 'content': 0.11777794361114502, 'timestamp': '2025-10-01 04:25:12.727000', 'step': 6533, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:12.781457', 'step': 6533, 'epoch': 1} {'type': 'loss', 'content': 0.14264319837093353, 'timestamp': '2025-10-01 04:25:12.784150', 'step': 6534, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:12.850564', 'step': 6534, 'epoch': 1} {'type': 'loss', 'content': 0.08736526966094971, 'timestamp': '2025-10-01 04:25:12.853440', 'step': 6535, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:12.917275', 'step': 6535, 'epoch': 1} {'type': 'loss', 'content': 0.16381070017814636, 'timestamp': '2025-10-01 04:25:12.925582', 'step': 6536, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:12.980163', 'step': 6536, 'epoch': 1} {'type': 'loss', 'content': 0.08537597209215164, 'timestamp': '2025-10-01 04:25:12.982800', 'step': 6537, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:13.046108', 'step': 6537, 'epoch': 1} {'type': 'loss', 'content': 0.17882542312145233, 'timestamp': '2025-10-01 04:25:13.048931', 'step': 6538, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:13.104405', 'step': 6538, 'epoch': 1} {'type': 'loss', 'content': 0.18419063091278076, 'timestamp': '2025-10-01 04:25:13.107131', 'step': 6539, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:13.162208', 'step': 6539, 'epoch': 1} {'type': 'loss', 'content': 0.14838096499443054, 'timestamp': '2025-10-01 04:25:13.169114', 'step': 6540, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:13.223758', 'step': 6540, 'epoch': 1} {'type': 'loss', 'content': 0.16150635480880737, 'timestamp': '2025-10-01 04:25:13.226880', 'step': 6541, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:13.281664', 'step': 6541, 'epoch': 1} {'type': 'loss', 'content': 0.12826332449913025, 'timestamp': '2025-10-01 04:25:13.284400', 'step': 6542, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:25:13.351505', 'step': 6542, 'epoch': 1} {'type': 'loss', 'content': 0.18211887776851654, 'timestamp': '2025-10-01 04:25:13.354397', 'step': 6543, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:13.408790', 'step': 6543, 'epoch': 1} {'type': 'loss', 'content': 0.20596228539943695, 'timestamp': '2025-10-01 04:25:13.415208', 'step': 6544, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:13.469446', 'step': 6544, 'epoch': 1} {'type': 'loss', 'content': 0.10414191335439682, 'timestamp': '2025-10-01 04:25:13.472399', 'step': 6545, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:25:13.528900', 'step': 6545, 'epoch': 1} {'type': 'loss', 'content': 0.13391780853271484, 'timestamp': '2025-10-01 04:25:13.531667', 'step': 6546, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:13.584916', 'step': 6546, 'epoch': 1} {'type': 'loss', 'content': 0.059781379997730255, 'timestamp': '2025-10-01 04:25:13.587968', 'step': 6547, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:13.663481', 'step': 6547, 'epoch': 1} {'type': 'loss', 'content': 0.10734357684850693, 'timestamp': '2025-10-01 04:25:13.670093', 'step': 6548, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:13.723570', 'step': 6548, 'epoch': 1} {'type': 'loss', 'content': 0.17429907619953156, 'timestamp': '2025-10-01 04:25:13.726041', 'step': 6549, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:13.782351', 'step': 6549, 'epoch': 1} {'type': 'loss', 'content': 0.19497570395469666, 'timestamp': '2025-10-01 04:25:13.784920', 'step': 6550, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:13.848655', 'step': 6550, 'epoch': 1} {'type': 'loss', 'content': 0.18789073824882507, 'timestamp': '2025-10-01 04:25:13.851434', 'step': 6551, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:13.915835', 'step': 6551, 'epoch': 1} {'type': 'loss', 'content': 0.1741827428340912, 'timestamp': '2025-10-01 04:25:13.922105', 'step': 6552, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:13.975436', 'step': 6552, 'epoch': 1} {'type': 'loss', 'content': 0.18836815655231476, 'timestamp': '2025-10-01 04:25:13.978095', 'step': 6553, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:14.032012', 'step': 6553, 'epoch': 1} {'type': 'loss', 'content': 0.04528915882110596, 'timestamp': '2025-10-01 04:25:14.035065', 'step': 6554, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:14.090406', 'step': 6554, 'epoch': 1} {'type': 'loss', 'content': 0.1508418470621109, 'timestamp': '2025-10-01 04:25:14.093464', 'step': 6555, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:25:14.148124', 'step': 6555, 'epoch': 1} {'type': 'loss', 'content': 0.20992980897426605, 'timestamp': '2025-10-01 04:25:14.154792', 'step': 6556, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:25:14.214446', 'step': 6556, 'epoch': 1} {'type': 'loss', 'content': 0.1187477707862854, 'timestamp': '2025-10-01 04:25:14.217236', 'step': 6557, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:25:14.271617', 'step': 6557, 'epoch': 1} {'type': 'loss', 'content': 0.1417890340089798, 'timestamp': '2025-10-01 04:25:14.275909', 'step': 6558, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:14.340947', 'step': 6558, 'epoch': 1} {'type': 'loss', 'content': 0.22232356667518616, 'timestamp': '2025-10-01 04:25:14.343595', 'step': 6559, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:14.408938', 'step': 6559, 'epoch': 1} {'type': 'loss', 'content': 0.15787231922149658, 'timestamp': '2025-10-01 04:25:14.415403', 'step': 6560, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:14.468646', 'step': 6560, 'epoch': 1} {'type': 'loss', 'content': 0.14514203369617462, 'timestamp': '2025-10-01 04:25:14.471178', 'step': 6561, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:14.541575', 'step': 6561, 'epoch': 1} {'type': 'loss', 'content': 0.17399802803993225, 'timestamp': '2025-10-01 04:25:14.543918', 'step': 6562, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:14.597232', 'step': 6562, 'epoch': 1} {'type': 'loss', 'content': 0.21985480189323425, 'timestamp': '2025-10-01 04:25:14.599563', 'step': 6563, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:14.652313', 'step': 6563, 'epoch': 1} {'type': 'loss', 'content': 0.21486787497997284, 'timestamp': '2025-10-01 04:25:14.658883', 'step': 6564, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:25:14.712998', 'step': 6564, 'epoch': 1} {'type': 'loss', 'content': 0.12107370793819427, 'timestamp': '2025-10-01 04:25:14.715523', 'step': 6565, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:14.769765', 'step': 6565, 'epoch': 1} {'type': 'loss', 'content': 0.14405450224876404, 'timestamp': '2025-10-01 04:25:14.772485', 'step': 6566, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:14.826151', 'step': 6566, 'epoch': 1} {'type': 'loss', 'content': 0.14115560054779053, 'timestamp': '2025-10-01 04:25:14.828497', 'step': 6567, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:14.887656', 'step': 6567, 'epoch': 1} {'type': 'loss', 'content': 0.12722721695899963, 'timestamp': '2025-10-01 04:25:14.894097', 'step': 6568, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:14.946672', 'step': 6568, 'epoch': 1} {'type': 'loss', 'content': 0.14010502398014069, 'timestamp': '2025-10-01 04:25:14.949291', 'step': 6569, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:25:15.003216', 'step': 6569, 'epoch': 1} {'type': 'loss', 'content': 0.27820560336112976, 'timestamp': '2025-10-01 04:25:15.005781', 'step': 6570, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:25:15.058502', 'step': 6570, 'epoch': 1} {'type': 'loss', 'content': 0.14695166051387787, 'timestamp': '2025-10-01 04:25:15.060563', 'step': 6571, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:25:15.113470', 'step': 6571, 'epoch': 1} {'type': 'loss', 'content': 0.16411913931369781, 'timestamp': '2025-10-01 04:25:15.119644', 'step': 6572, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:15.172387', 'step': 6572, 'epoch': 1} {'type': 'loss', 'content': 0.10594233125448227, 'timestamp': '2025-10-01 04:25:15.174935', 'step': 6573, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:15.228567', 'step': 6573, 'epoch': 1} {'type': 'loss', 'content': 0.10854203253984451, 'timestamp': '2025-10-01 04:25:15.231007', 'step': 6574, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:15.284540', 'step': 6574, 'epoch': 1} {'type': 'loss', 'content': 0.1947346031665802, 'timestamp': '2025-10-01 04:25:15.286960', 'step': 6575, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:25:15.340013', 'step': 6575, 'epoch': 1} {'type': 'loss', 'content': 0.11116594076156616, 'timestamp': '2025-10-01 04:25:15.346300', 'step': 6576, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:25:15.398376', 'step': 6576, 'epoch': 1} {'type': 'loss', 'content': 0.11817627400159836, 'timestamp': '2025-10-01 04:25:15.400944', 'step': 6577, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:15.458591', 'step': 6577, 'epoch': 1} {'type': 'loss', 'content': 0.19162560999393463, 'timestamp': '2025-10-01 04:25:15.462137', 'step': 6578, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:15.516016', 'step': 6578, 'epoch': 1} {'type': 'loss', 'content': 0.13825710117816925, 'timestamp': '2025-10-01 04:25:15.518335', 'step': 6579, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:15.572401', 'step': 6579, 'epoch': 1} {'type': 'loss', 'content': 0.20643554627895355, 'timestamp': '2025-10-01 04:25:15.578658', 'step': 6580, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:15.631459', 'step': 6580, 'epoch': 1} {'type': 'loss', 'content': 0.18840155005455017, 'timestamp': '2025-10-01 04:25:15.634009', 'step': 6581, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:15.687688', 'step': 6581, 'epoch': 1} {'type': 'loss', 'content': 0.13909406960010529, 'timestamp': '2025-10-01 04:25:15.690072', 'step': 6582, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:15.743475', 'step': 6582, 'epoch': 1} {'type': 'loss', 'content': 0.11672089248895645, 'timestamp': '2025-10-01 04:25:15.745879', 'step': 6583, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:15.798386', 'step': 6583, 'epoch': 1} {'type': 'loss', 'content': 0.2240617275238037, 'timestamp': '2025-10-01 04:25:15.804301', 'step': 6584, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:25:15.857813', 'step': 6584, 'epoch': 1} {'type': 'loss', 'content': 0.24000594019889832, 'timestamp': '2025-10-01 04:25:15.860770', 'step': 6585, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:15.914457', 'step': 6585, 'epoch': 1} {'type': 'loss', 'content': 0.10797123610973358, 'timestamp': '2025-10-01 04:25:15.916962', 'step': 6586, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:25:15.969798', 'step': 6586, 'epoch': 1} {'type': 'loss', 'content': 0.14240649342536926, 'timestamp': '2025-10-01 04:25:15.981038', 'step': 6587, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:16.034090', 'step': 6587, 'epoch': 1} {'type': 'loss', 'content': 0.16536669433116913, 'timestamp': '2025-10-01 04:25:16.040211', 'step': 6588, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:16.104442', 'step': 6588, 'epoch': 1} {'type': 'loss', 'content': 0.13311584293842316, 'timestamp': '2025-10-01 04:25:16.115141', 'step': 6589, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:25:16.168100', 'step': 6589, 'epoch': 1} {'type': 'loss', 'content': 0.13341957330703735, 'timestamp': '2025-10-01 04:25:16.170248', 'step': 6590, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:25:16.223042', 'step': 6590, 'epoch': 1} {'type': 'loss', 'content': 0.10463216155767441, 'timestamp': '2025-10-01 04:25:16.225475', 'step': 6591, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:25:16.287195', 'step': 6591, 'epoch': 1} {'type': 'loss', 'content': 0.24490498006343842, 'timestamp': '2025-10-01 04:25:16.293012', 'step': 6592, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:16.348174', 'step': 6592, 'epoch': 1} {'type': 'loss', 'content': 0.11125189810991287, 'timestamp': '2025-10-01 04:25:16.350623', 'step': 6593, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:16.404296', 'step': 6593, 'epoch': 1} {'type': 'loss', 'content': 0.18238550424575806, 'timestamp': '2025-10-01 04:25:16.406592', 'step': 6594, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:16.459206', 'step': 6594, 'epoch': 1} {'type': 'loss', 'content': 0.13299556076526642, 'timestamp': '2025-10-01 04:25:16.471075', 'step': 6595, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:25:16.524586', 'step': 6595, 'epoch': 1} {'type': 'loss', 'content': 0.13235574960708618, 'timestamp': '2025-10-01 04:25:16.531028', 'step': 6596, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:16.591658', 'step': 6596, 'epoch': 1} {'type': 'loss', 'content': 0.206233948469162, 'timestamp': '2025-10-01 04:25:16.594014', 'step': 6597, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:16.648112', 'step': 6597, 'epoch': 1} {'type': 'loss', 'content': 0.08094044029712677, 'timestamp': '2025-10-01 04:25:16.650500', 'step': 6598, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:25:16.703713', 'step': 6598, 'epoch': 1} {'type': 'loss', 'content': 0.09904584288597107, 'timestamp': '2025-10-01 04:25:16.707375', 'step': 6599, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:16.760613', 'step': 6599, 'epoch': 1} {'type': 'loss', 'content': 0.09017051011323929, 'timestamp': '2025-10-01 04:25:16.766683', 'step': 6600, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:16.819389', 'step': 6600, 'epoch': 1} {'type': 'loss', 'content': 0.10931180417537689, 'timestamp': '2025-10-01 04:25:16.821708', 'step': 6601, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:16.874785', 'step': 6601, 'epoch': 1} {'type': 'loss', 'content': 0.14203523099422455, 'timestamp': '2025-10-01 04:25:16.876972', 'step': 6602, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:25:16.930148', 'step': 6602, 'epoch': 1} {'type': 'loss', 'content': 0.19935666024684906, 'timestamp': '2025-10-01 04:25:16.932317', 'step': 6603, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:16.985408', 'step': 6603, 'epoch': 1} {'type': 'loss', 'content': 0.28130635619163513, 'timestamp': '2025-10-01 04:25:16.991200', 'step': 6604, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:17.044343', 'step': 6604, 'epoch': 1} {'type': 'loss', 'content': 0.07093434780836105, 'timestamp': '2025-10-01 04:25:17.046804', 'step': 6605, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:17.099431', 'step': 6605, 'epoch': 1} {'type': 'loss', 'content': 0.14468050003051758, 'timestamp': '2025-10-01 04:25:17.111676', 'step': 6606, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:25:17.173857', 'step': 6606, 'epoch': 1} {'type': 'loss', 'content': 0.11081630736589432, 'timestamp': '2025-10-01 04:25:17.176190', 'step': 6607, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:17.229286', 'step': 6607, 'epoch': 1} {'type': 'loss', 'content': 0.1563815176486969, 'timestamp': '2025-10-01 04:25:17.235078', 'step': 6608, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:17.297425', 'step': 6608, 'epoch': 1} {'type': 'loss', 'content': 0.16106922924518585, 'timestamp': '2025-10-01 04:25:17.299916', 'step': 6609, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:17.352155', 'step': 6609, 'epoch': 1} {'type': 'loss', 'content': 0.2277827262878418, 'timestamp': '2025-10-01 04:25:17.354595', 'step': 6610, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:17.407548', 'step': 6610, 'epoch': 1} {'type': 'loss', 'content': 0.16227686405181885, 'timestamp': '2025-10-01 04:25:17.410925', 'step': 6611, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:17.464555', 'step': 6611, 'epoch': 1} {'type': 'loss', 'content': 0.15619628131389618, 'timestamp': '2025-10-01 04:25:17.470496', 'step': 6612, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:17.523059', 'step': 6612, 'epoch': 1} {'type': 'loss', 'content': 0.18313129246234894, 'timestamp': '2025-10-01 04:25:17.525547', 'step': 6613, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:17.579044', 'step': 6613, 'epoch': 1} {'type': 'loss', 'content': 0.15771345794200897, 'timestamp': '2025-10-01 04:25:17.581386', 'step': 6614, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:17.634824', 'step': 6614, 'epoch': 1} {'type': 'loss', 'content': 0.09339402616024017, 'timestamp': '2025-10-01 04:25:17.637285', 'step': 6615, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:17.690166', 'step': 6615, 'epoch': 1} {'type': 'loss', 'content': 0.22203946113586426, 'timestamp': '2025-10-01 04:25:17.696258', 'step': 6616, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:17.750453', 'step': 6616, 'epoch': 1} {'type': 'loss', 'content': 0.1439288705587387, 'timestamp': '2025-10-01 04:25:17.752599', 'step': 6617, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:17.805185', 'step': 6617, 'epoch': 1} {'type': 'loss', 'content': 0.10055656731128693, 'timestamp': '2025-10-01 04:25:17.807791', 'step': 6618, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:17.860617', 'step': 6618, 'epoch': 1} {'type': 'loss', 'content': 0.13017778098583221, 'timestamp': '2025-10-01 04:25:17.863102', 'step': 6619, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:17.922351', 'step': 6619, 'epoch': 1} {'type': 'loss', 'content': 0.12710784375667572, 'timestamp': '2025-10-01 04:25:17.928332', 'step': 6620, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:17.986276', 'step': 6620, 'epoch': 1} {'type': 'loss', 'content': 0.2040969729423523, 'timestamp': '2025-10-01 04:25:17.988378', 'step': 6621, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:18.042341', 'step': 6621, 'epoch': 1} {'type': 'loss', 'content': 0.13371893763542175, 'timestamp': '2025-10-01 04:25:18.044680', 'step': 6622, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:18.107566', 'step': 6622, 'epoch': 1} {'type': 'loss', 'content': 0.22360172867774963, 'timestamp': '2025-10-01 04:25:18.110461', 'step': 6623, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:18.164445', 'step': 6623, 'epoch': 1} {'type': 'loss', 'content': 0.11238426715135574, 'timestamp': '2025-10-01 04:25:18.170521', 'step': 6624, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:18.222718', 'step': 6624, 'epoch': 1} {'type': 'loss', 'content': 0.18181665241718292, 'timestamp': '2025-10-01 04:25:18.225519', 'step': 6625, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:18.283433', 'step': 6625, 'epoch': 1} {'type': 'loss', 'content': 0.16236911714076996, 'timestamp': '2025-10-01 04:25:18.285898', 'step': 6626, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:18.339023', 'step': 6626, 'epoch': 1} {'type': 'loss', 'content': 0.085397869348526, 'timestamp': '2025-10-01 04:25:18.341603', 'step': 6627, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:18.394805', 'step': 6627, 'epoch': 1} {'type': 'loss', 'content': 0.19593192636966705, 'timestamp': '2025-10-01 04:25:18.415355', 'step': 6628, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:18.468173', 'step': 6628, 'epoch': 1} {'type': 'loss', 'content': 0.2143905609846115, 'timestamp': '2025-10-01 04:25:18.470525', 'step': 6629, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:18.525229', 'step': 6629, 'epoch': 1} {'type': 'loss', 'content': 0.13175736367702484, 'timestamp': '2025-10-01 04:25:18.527524', 'step': 6630, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:18.580671', 'step': 6630, 'epoch': 1} {'type': 'loss', 'content': 0.24355483055114746, 'timestamp': '2025-10-01 04:25:18.583055', 'step': 6631, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:18.636436', 'step': 6631, 'epoch': 1} {'type': 'loss', 'content': 0.11692456156015396, 'timestamp': '2025-10-01 04:25:18.642470', 'step': 6632, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:18.694396', 'step': 6632, 'epoch': 1} {'type': 'loss', 'content': 0.13746529817581177, 'timestamp': '2025-10-01 04:25:18.697027', 'step': 6633, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:18.750284', 'step': 6633, 'epoch': 1} {'type': 'loss', 'content': 0.16324062645435333, 'timestamp': '2025-10-01 04:25:18.752546', 'step': 6634, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:18.805371', 'step': 6634, 'epoch': 1} {'type': 'loss', 'content': 0.25018954277038574, 'timestamp': '2025-10-01 04:25:18.807758', 'step': 6635, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:18.860538', 'step': 6635, 'epoch': 1} {'type': 'loss', 'content': 0.1737774908542633, 'timestamp': '2025-10-01 04:25:18.866444', 'step': 6636, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:18.918134', 'step': 6636, 'epoch': 1} {'type': 'loss', 'content': 0.06820111721754074, 'timestamp': '2025-10-01 04:25:18.920464', 'step': 6637, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:18.973631', 'step': 6637, 'epoch': 1} {'type': 'loss', 'content': 0.16986927390098572, 'timestamp': '2025-10-01 04:25:18.975983', 'step': 6638, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:19.028767', 'step': 6638, 'epoch': 1} {'type': 'loss', 'content': 0.16090592741966248, 'timestamp': '2025-10-01 04:25:19.031027', 'step': 6639, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:19.083717', 'step': 6639, 'epoch': 1} {'type': 'loss', 'content': 0.14496619999408722, 'timestamp': '2025-10-01 04:25:19.089782', 'step': 6640, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:19.142350', 'step': 6640, 'epoch': 1} {'type': 'loss', 'content': 0.25506672263145447, 'timestamp': '2025-10-01 04:25:19.157163', 'step': 6641, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:19.210371', 'step': 6641, 'epoch': 1} {'type': 'loss', 'content': 0.14689850807189941, 'timestamp': '2025-10-01 04:25:19.212682', 'step': 6642, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:25:19.271603', 'step': 6642, 'epoch': 1} {'type': 'loss', 'content': 0.2505151629447937, 'timestamp': '2025-10-01 04:25:19.274368', 'step': 6643, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:19.327655', 'step': 6643, 'epoch': 1} {'type': 'loss', 'content': 0.16759257018566132, 'timestamp': '2025-10-01 04:25:19.333698', 'step': 6644, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:19.385713', 'step': 6644, 'epoch': 1} {'type': 'loss', 'content': 0.087417371571064, 'timestamp': '2025-10-01 04:25:19.388209', 'step': 6645, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:19.441169', 'step': 6645, 'epoch': 1} {'type': 'loss', 'content': 0.135017529129982, 'timestamp': '2025-10-01 04:25:19.443379', 'step': 6646, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:25:19.496131', 'step': 6646, 'epoch': 1} {'type': 'loss', 'content': 0.08513517677783966, 'timestamp': '2025-10-01 04:25:19.498481', 'step': 6647, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:19.553472', 'step': 6647, 'epoch': 1} {'type': 'loss', 'content': 0.0876665711402893, 'timestamp': '2025-10-01 04:25:19.559663', 'step': 6648, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:25:19.625694', 'step': 6648, 'epoch': 1} {'type': 'loss', 'content': 0.22503973543643951, 'timestamp': '2025-10-01 04:25:19.627989', 'step': 6649, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:19.681694', 'step': 6649, 'epoch': 1} {'type': 'loss', 'content': 0.22313907742500305, 'timestamp': '2025-10-01 04:25:19.684130', 'step': 6650, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:19.750073', 'step': 6650, 'epoch': 1} {'type': 'loss', 'content': 0.09177657961845398, 'timestamp': '2025-10-01 04:25:19.760157', 'step': 6651, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:25:19.813228', 'step': 6651, 'epoch': 1} {'type': 'loss', 'content': 0.2031959444284439, 'timestamp': '2025-10-01 04:25:19.819186', 'step': 6652, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:19.871787', 'step': 6652, 'epoch': 1} {'type': 'loss', 'content': 0.22925211489200592, 'timestamp': '2025-10-01 04:25:19.874178', 'step': 6653, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:19.926761', 'step': 6653, 'epoch': 1} {'type': 'loss', 'content': 0.11497489362955093, 'timestamp': '2025-10-01 04:25:19.929172', 'step': 6654, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:19.982490', 'step': 6654, 'epoch': 1} {'type': 'loss', 'content': 0.20669683814048767, 'timestamp': '2025-10-01 04:25:19.992940', 'step': 6655, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:20.059726', 'step': 6655, 'epoch': 1} {'type': 'loss', 'content': 0.18120871484279633, 'timestamp': '2025-10-01 04:25:20.065658', 'step': 6656, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:20.118179', 'step': 6656, 'epoch': 1} {'type': 'loss', 'content': 0.14845117926597595, 'timestamp': '2025-10-01 04:25:20.120401', 'step': 6657, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:25:20.173329', 'step': 6657, 'epoch': 1} {'type': 'loss', 'content': 0.21573631465435028, 'timestamp': '2025-10-01 04:25:20.175649', 'step': 6658, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:20.229310', 'step': 6658, 'epoch': 1} {'type': 'loss', 'content': 0.10997273772954941, 'timestamp': '2025-10-01 04:25:20.231561', 'step': 6659, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:20.284403', 'step': 6659, 'epoch': 1} {'type': 'loss', 'content': 0.1803687959909439, 'timestamp': '2025-10-01 04:25:20.290834', 'step': 6660, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:20.343644', 'step': 6660, 'epoch': 1} {'type': 'loss', 'content': 0.0618419386446476, 'timestamp': '2025-10-01 04:25:20.345967', 'step': 6661, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:25:20.399728', 'step': 6661, 'epoch': 1} {'type': 'loss', 'content': 0.11568693816661835, 'timestamp': '2025-10-01 04:25:20.401983', 'step': 6662, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:20.470235', 'step': 6662, 'epoch': 1} {'type': 'loss', 'content': 0.1285158097743988, 'timestamp': '2025-10-01 04:25:20.472551', 'step': 6663, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:20.525719', 'step': 6663, 'epoch': 1} {'type': 'loss', 'content': 0.13758264482021332, 'timestamp': '2025-10-01 04:25:20.531395', 'step': 6664, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:20.584107', 'step': 6664, 'epoch': 1} {'type': 'loss', 'content': 0.13376060128211975, 'timestamp': '2025-10-01 04:25:20.586586', 'step': 6665, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:20.640088', 'step': 6665, 'epoch': 1} {'type': 'loss', 'content': 0.10279106348752975, 'timestamp': '2025-10-01 04:25:20.643013', 'step': 6666, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:20.696391', 'step': 6666, 'epoch': 1} {'type': 'loss', 'content': 0.32477086782455444, 'timestamp': '2025-10-01 04:25:20.698941', 'step': 6667, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:20.751759', 'step': 6667, 'epoch': 1} {'type': 'loss', 'content': 0.1420062929391861, 'timestamp': '2025-10-01 04:25:20.758552', 'step': 6668, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:20.818465', 'step': 6668, 'epoch': 1} {'type': 'loss', 'content': 0.17305544018745422, 'timestamp': '2025-10-01 04:25:20.820778', 'step': 6669, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:20.873400', 'step': 6669, 'epoch': 1} {'type': 'loss', 'content': 0.09643819183111191, 'timestamp': '2025-10-01 04:25:20.876489', 'step': 6670, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:20.930177', 'step': 6670, 'epoch': 1} {'type': 'loss', 'content': 0.04662863537669182, 'timestamp': '2025-10-01 04:25:20.933154', 'step': 6671, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:20.987125', 'step': 6671, 'epoch': 1} {'type': 'loss', 'content': 0.1332251876592636, 'timestamp': '2025-10-01 04:25:20.994706', 'step': 6672, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:21.048745', 'step': 6672, 'epoch': 1} {'type': 'loss', 'content': 0.2214997261762619, 'timestamp': '2025-10-01 04:25:21.051452', 'step': 6673, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:21.105379', 'step': 6673, 'epoch': 1} {'type': 'loss', 'content': 0.1881057173013687, 'timestamp': '2025-10-01 04:25:21.107842', 'step': 6674, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:21.162778', 'step': 6674, 'epoch': 1} {'type': 'loss', 'content': 0.2182495892047882, 'timestamp': '2025-10-01 04:25:21.179569', 'step': 6675, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:21.234154', 'step': 6675, 'epoch': 1} {'type': 'loss', 'content': 0.08578502386808395, 'timestamp': '2025-10-01 04:25:21.240699', 'step': 6676, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:25:21.295710', 'step': 6676, 'epoch': 1} {'type': 'loss', 'content': 0.12789027392864227, 'timestamp': '2025-10-01 04:25:21.298181', 'step': 6677, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:21.352445', 'step': 6677, 'epoch': 1} {'type': 'loss', 'content': 0.1453855037689209, 'timestamp': '2025-10-01 04:25:21.355389', 'step': 6678, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:21.409117', 'step': 6678, 'epoch': 1} {'type': 'loss', 'content': 0.19369910657405853, 'timestamp': '2025-10-01 04:25:21.411833', 'step': 6679, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:25:21.465786', 'step': 6679, 'epoch': 1} {'type': 'loss', 'content': 0.18819713592529297, 'timestamp': '2025-10-01 04:25:21.472177', 'step': 6680, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:21.526143', 'step': 6680, 'epoch': 1} {'type': 'loss', 'content': 0.12611737847328186, 'timestamp': '2025-10-01 04:25:21.528563', 'step': 6681, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:21.582786', 'step': 6681, 'epoch': 1} {'type': 'loss', 'content': 0.1609942764043808, 'timestamp': '2025-10-01 04:25:21.585607', 'step': 6682, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:21.642816', 'step': 6682, 'epoch': 1} {'type': 'loss', 'content': 0.21977972984313965, 'timestamp': '2025-10-01 04:25:21.645895', 'step': 6683, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:21.699327', 'step': 6683, 'epoch': 1} {'type': 'loss', 'content': 0.1647215485572815, 'timestamp': '2025-10-01 04:25:21.705874', 'step': 6684, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:25:21.759366', 'step': 6684, 'epoch': 1} {'type': 'loss', 'content': 0.18156400322914124, 'timestamp': '2025-10-01 04:25:21.762296', 'step': 6685, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:21.816694', 'step': 6685, 'epoch': 1} {'type': 'loss', 'content': 0.10557462275028229, 'timestamp': '2025-10-01 04:25:21.819507', 'step': 6686, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:21.873288', 'step': 6686, 'epoch': 1} {'type': 'loss', 'content': 0.13192234933376312, 'timestamp': '2025-10-01 04:25:21.875878', 'step': 6687, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:21.929490', 'step': 6687, 'epoch': 1} {'type': 'loss', 'content': 0.171589657664299, 'timestamp': '2025-10-01 04:25:21.935348', 'step': 6688, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:21.988523', 'step': 6688, 'epoch': 1} {'type': 'loss', 'content': 0.23748779296875, 'timestamp': '2025-10-01 04:25:21.991060', 'step': 6689, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:25:22.045234', 'step': 6689, 'epoch': 1} {'type': 'loss', 'content': 0.2134408950805664, 'timestamp': '2025-10-01 04:25:22.048249', 'step': 6690, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:22.102483', 'step': 6690, 'epoch': 1} {'type': 'loss', 'content': 0.1926277130842209, 'timestamp': '2025-10-01 04:25:22.105384', 'step': 6691, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:22.160488', 'step': 6691, 'epoch': 1} {'type': 'loss', 'content': 0.14819329977035522, 'timestamp': '2025-10-01 04:25:22.166723', 'step': 6692, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:22.219781', 'step': 6692, 'epoch': 1} {'type': 'loss', 'content': 0.18856588006019592, 'timestamp': '2025-10-01 04:25:22.223653', 'step': 6693, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:22.278667', 'step': 6693, 'epoch': 1} {'type': 'loss', 'content': 0.13664358854293823, 'timestamp': '2025-10-01 04:25:22.281033', 'step': 6694, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:22.334717', 'step': 6694, 'epoch': 1} {'type': 'loss', 'content': 0.13229355216026306, 'timestamp': '2025-10-01 04:25:22.337071', 'step': 6695, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:25:22.390439', 'step': 6695, 'epoch': 1} {'type': 'loss', 'content': 0.15068165957927704, 'timestamp': '2025-10-01 04:25:22.396372', 'step': 6696, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:22.458283', 'step': 6696, 'epoch': 1} {'type': 'loss', 'content': 0.17477712035179138, 'timestamp': '2025-10-01 04:25:22.461219', 'step': 6697, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:22.514656', 'step': 6697, 'epoch': 1} {'type': 'loss', 'content': 0.21312658488750458, 'timestamp': '2025-10-01 04:25:22.517166', 'step': 6698, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:25:22.571529', 'step': 6698, 'epoch': 1} {'type': 'loss', 'content': 0.19337192177772522, 'timestamp': '2025-10-01 04:25:22.574024', 'step': 6699, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:22.626870', 'step': 6699, 'epoch': 1} {'type': 'loss', 'content': 0.11535780876874924, 'timestamp': '2025-10-01 04:25:22.632830', 'step': 6700, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:22.709450', 'step': 6700, 'epoch': 1} {'type': 'loss', 'content': 0.182997927069664, 'timestamp': '2025-10-01 04:25:22.711723', 'step': 6701, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:22.764883', 'step': 6701, 'epoch': 1} {'type': 'loss', 'content': 0.22927583754062653, 'timestamp': '2025-10-01 04:25:22.767264', 'step': 6702, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:25:22.820206', 'step': 6702, 'epoch': 1} {'type': 'loss', 'content': 0.16704364120960236, 'timestamp': '2025-10-01 04:25:22.822562', 'step': 6703, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:22.876387', 'step': 6703, 'epoch': 1} {'type': 'loss', 'content': 0.16812348365783691, 'timestamp': '2025-10-01 04:25:22.882494', 'step': 6704, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:22.934957', 'step': 6704, 'epoch': 1} {'type': 'loss', 'content': 0.1774630844593048, 'timestamp': '2025-10-01 04:25:22.937770', 'step': 6705, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:25:22.991576', 'step': 6705, 'epoch': 1} {'type': 'loss', 'content': 0.1655624955892563, 'timestamp': '2025-10-01 04:25:22.994070', 'step': 6706, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:23.047886', 'step': 6706, 'epoch': 1} {'type': 'loss', 'content': 0.15424175560474396, 'timestamp': '2025-10-01 04:25:23.050652', 'step': 6707, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:23.103551', 'step': 6707, 'epoch': 1} {'type': 'loss', 'content': 0.1613803505897522, 'timestamp': '2025-10-01 04:25:23.109698', 'step': 6708, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:23.161903', 'step': 6708, 'epoch': 1} {'type': 'loss', 'content': 0.09974133223295212, 'timestamp': '2025-10-01 04:25:23.164237', 'step': 6709, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:23.217062', 'step': 6709, 'epoch': 1} {'type': 'loss', 'content': 0.21898765861988068, 'timestamp': '2025-10-01 04:25:23.219354', 'step': 6710, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:25:23.272794', 'step': 6710, 'epoch': 1} {'type': 'loss', 'content': 0.2233736217021942, 'timestamp': '2025-10-01 04:25:23.289466', 'step': 6711, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:23.342718', 'step': 6711, 'epoch': 1} {'type': 'loss', 'content': 0.15732313692569733, 'timestamp': '2025-10-01 04:25:23.348845', 'step': 6712, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:23.401428', 'step': 6712, 'epoch': 1} {'type': 'loss', 'content': 0.13895674049854279, 'timestamp': '2025-10-01 04:25:23.403934', 'step': 6713, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:23.458623', 'step': 6713, 'epoch': 1} {'type': 'loss', 'content': 0.17191095650196075, 'timestamp': '2025-10-01 04:25:23.460891', 'step': 6714, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:25:23.515287', 'step': 6714, 'epoch': 1} {'type': 'loss', 'content': 0.09166035056114197, 'timestamp': '2025-10-01 04:25:23.517603', 'step': 6715, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:23.571258', 'step': 6715, 'epoch': 1} {'type': 'loss', 'content': 0.16734205186367035, 'timestamp': '2025-10-01 04:25:23.577946', 'step': 6716, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-01 04:25:23.632453', 'step': 6716, 'epoch': 1} {'type': 'loss', 'content': 0.19257594645023346, 'timestamp': '2025-10-01 04:25:23.645213', 'step': 6717, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:23.698219', 'step': 6717, 'epoch': 1} {'type': 'loss', 'content': 0.1513943374156952, 'timestamp': '2025-10-01 04:25:23.700671', 'step': 6718, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:25:23.753758', 'step': 6718, 'epoch': 1} {'type': 'loss', 'content': 0.17995992302894592, 'timestamp': '2025-10-01 04:25:23.756146', 'step': 6719, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:23.809678', 'step': 6719, 'epoch': 1} {'type': 'loss', 'content': 0.1498771458864212, 'timestamp': '2025-10-01 04:25:23.815436', 'step': 6720, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:23.867537', 'step': 6720, 'epoch': 1} {'type': 'loss', 'content': 0.2180739790201187, 'timestamp': '2025-10-01 04:25:23.880076', 'step': 6721, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:23.933205', 'step': 6721, 'epoch': 1} {'type': 'loss', 'content': 0.2786679267883301, 'timestamp': '2025-10-01 04:25:23.935590', 'step': 6722, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:23.988843', 'step': 6722, 'epoch': 1} {'type': 'loss', 'content': 0.1994396597146988, 'timestamp': '2025-10-01 04:25:23.991330', 'step': 6723, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:24.044333', 'step': 6723, 'epoch': 1} {'type': 'loss', 'content': 0.11111622303724289, 'timestamp': '2025-10-01 04:25:24.050234', 'step': 6724, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:24.112648', 'step': 6724, 'epoch': 1} {'type': 'loss', 'content': 0.12163747102022171, 'timestamp': '2025-10-01 04:25:24.114956', 'step': 6725, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:24.168063', 'step': 6725, 'epoch': 1} {'type': 'loss', 'content': 0.11259359866380692, 'timestamp': '2025-10-01 04:25:24.170595', 'step': 6726, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:24.223845', 'step': 6726, 'epoch': 1} {'type': 'loss', 'content': 0.12093155086040497, 'timestamp': '2025-10-01 04:25:24.226337', 'step': 6727, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:24.279353', 'step': 6727, 'epoch': 1} {'type': 'loss', 'content': 0.09436946362257004, 'timestamp': '2025-10-01 04:25:24.285276', 'step': 6728, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:24.337909', 'step': 6728, 'epoch': 1} {'type': 'loss', 'content': 0.1793186068534851, 'timestamp': '2025-10-01 04:25:24.340467', 'step': 6729, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:24.394742', 'step': 6729, 'epoch': 1} {'type': 'loss', 'content': 0.19983923435211182, 'timestamp': '2025-10-01 04:25:24.397159', 'step': 6730, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:24.450869', 'step': 6730, 'epoch': 1} {'type': 'loss', 'content': 0.20195066928863525, 'timestamp': '2025-10-01 04:25:24.453291', 'step': 6731, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:24.507153', 'step': 6731, 'epoch': 1} {'type': 'loss', 'content': 0.2756726145744324, 'timestamp': '2025-10-01 04:25:24.513281', 'step': 6732, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:24.570393', 'step': 6732, 'epoch': 1} {'type': 'loss', 'content': 0.13068263232707977, 'timestamp': '2025-10-01 04:25:24.572385', 'step': 6733, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:24.625827', 'step': 6733, 'epoch': 1} {'type': 'loss', 'content': 0.18716014921665192, 'timestamp': '2025-10-01 04:25:24.628848', 'step': 6734, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:24.682358', 'step': 6734, 'epoch': 1} {'type': 'loss', 'content': 0.0722484439611435, 'timestamp': '2025-10-01 04:25:24.684698', 'step': 6735, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:24.738363', 'step': 6735, 'epoch': 1} {'type': 'loss', 'content': 0.17157165706157684, 'timestamp': '2025-10-01 04:25:24.744525', 'step': 6736, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:24.797239', 'step': 6736, 'epoch': 1} {'type': 'loss', 'content': 0.16468602418899536, 'timestamp': '2025-10-01 04:25:24.799493', 'step': 6737, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:24.852742', 'step': 6737, 'epoch': 1} {'type': 'loss', 'content': 0.14332158863544464, 'timestamp': '2025-10-01 04:25:24.855156', 'step': 6738, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:24.909203', 'step': 6738, 'epoch': 1} {'type': 'loss', 'content': 0.16917665302753448, 'timestamp': '2025-10-01 04:25:24.911704', 'step': 6739, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:24.976262', 'step': 6739, 'epoch': 1} {'type': 'loss', 'content': 0.14607204496860504, 'timestamp': '2025-10-01 04:25:24.982600', 'step': 6740, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:25.035996', 'step': 6740, 'epoch': 1} {'type': 'loss', 'content': 0.22014430165290833, 'timestamp': '2025-10-01 04:25:25.038484', 'step': 6741, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:25:25.092086', 'step': 6741, 'epoch': 1} {'type': 'loss', 'content': 0.15668068826198578, 'timestamp': '2025-10-01 04:25:25.094385', 'step': 6742, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:25.148655', 'step': 6742, 'epoch': 1} {'type': 'loss', 'content': 0.14364437758922577, 'timestamp': '2025-10-01 04:25:25.151078', 'step': 6743, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:25.215328', 'step': 6743, 'epoch': 1} {'type': 'loss', 'content': 0.12510952353477478, 'timestamp': '2025-10-01 04:25:25.221506', 'step': 6744, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:25.274572', 'step': 6744, 'epoch': 1} {'type': 'loss', 'content': 0.12269136309623718, 'timestamp': '2025-10-01 04:25:25.277063', 'step': 6745, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:25.330502', 'step': 6745, 'epoch': 1} {'type': 'loss', 'content': 0.13375870883464813, 'timestamp': '2025-10-01 04:25:25.332887', 'step': 6746, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:25.386691', 'step': 6746, 'epoch': 1} {'type': 'loss', 'content': 0.14706231653690338, 'timestamp': '2025-10-01 04:25:25.389220', 'step': 6747, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:25.443399', 'step': 6747, 'epoch': 1} {'type': 'loss', 'content': 0.18405912816524506, 'timestamp': '2025-10-01 04:25:25.449711', 'step': 6748, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:25.502581', 'step': 6748, 'epoch': 1} {'type': 'loss', 'content': 0.14231421053409576, 'timestamp': '2025-10-01 04:25:25.517680', 'step': 6749, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:25.588689', 'step': 6749, 'epoch': 1} {'type': 'loss', 'content': 0.24135926365852356, 'timestamp': '2025-10-01 04:25:25.598948', 'step': 6750, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:25.670899', 'step': 6750, 'epoch': 1} {'type': 'loss', 'content': 0.10339199751615524, 'timestamp': '2025-10-01 04:25:25.686142', 'step': 6751, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:25.754469', 'step': 6751, 'epoch': 1} {'type': 'loss', 'content': 0.18722032010555267, 'timestamp': '2025-10-01 04:25:25.761600', 'step': 6752, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:25.832915', 'step': 6752, 'epoch': 1} {'type': 'loss', 'content': 0.17414136230945587, 'timestamp': '2025-10-01 04:25:25.848126', 'step': 6753, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:25.912748', 'step': 6753, 'epoch': 1} {'type': 'loss', 'content': 0.13513252139091492, 'timestamp': '2025-10-01 04:25:25.918698', 'step': 6754, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:25.991390', 'step': 6754, 'epoch': 1} {'type': 'loss', 'content': 0.15183258056640625, 'timestamp': '2025-10-01 04:25:25.994266', 'step': 6755, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:25:26.049044', 'step': 6755, 'epoch': 1} {'type': 'loss', 'content': 0.15211978554725647, 'timestamp': '2025-10-01 04:25:26.058304', 'step': 6756, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:26.118317', 'step': 6756, 'epoch': 1} {'type': 'loss', 'content': 0.11673261225223541, 'timestamp': '2025-10-01 04:25:26.122744', 'step': 6757, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:26.179472', 'step': 6757, 'epoch': 1} {'type': 'loss', 'content': 0.06497528403997421, 'timestamp': '2025-10-01 04:25:26.189465', 'step': 6758, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:26.251824', 'step': 6758, 'epoch': 1} {'type': 'loss', 'content': 0.1450057476758957, 'timestamp': '2025-10-01 04:25:26.259035', 'step': 6759, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:26.314914', 'step': 6759, 'epoch': 1} {'type': 'loss', 'content': 0.11852043122053146, 'timestamp': '2025-10-01 04:25:26.339466', 'step': 6760, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:26.397480', 'step': 6760, 'epoch': 1} {'type': 'loss', 'content': 0.1382008194923401, 'timestamp': '2025-10-01 04:25:26.400040', 'step': 6761, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:26.456835', 'step': 6761, 'epoch': 1} {'type': 'loss', 'content': 0.17171111702919006, 'timestamp': '2025-10-01 04:25:26.464568', 'step': 6762, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:25:26.539737', 'step': 6762, 'epoch': 1} {'type': 'loss', 'content': 0.12722653150558472, 'timestamp': '2025-10-01 04:25:26.550922', 'step': 6763, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:26.617323', 'step': 6763, 'epoch': 1} {'type': 'loss', 'content': 0.10327773541212082, 'timestamp': '2025-10-01 04:25:26.624264', 'step': 6764, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:26.689495', 'step': 6764, 'epoch': 1} {'type': 'loss', 'content': 0.14051999151706696, 'timestamp': '2025-10-01 04:25:26.692095', 'step': 6765, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:26.745415', 'step': 6765, 'epoch': 1} {'type': 'loss', 'content': 0.24780583381652832, 'timestamp': '2025-10-01 04:25:26.747787', 'step': 6766, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:26.802432', 'step': 6766, 'epoch': 1} {'type': 'loss', 'content': 0.17281527817249298, 'timestamp': '2025-10-01 04:25:26.804609', 'step': 6767, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:26.857922', 'step': 6767, 'epoch': 1} {'type': 'loss', 'content': 0.20367613434791565, 'timestamp': '2025-10-01 04:25:26.865042', 'step': 6768, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:26.917422', 'step': 6768, 'epoch': 1} {'type': 'loss', 'content': 0.10406095534563065, 'timestamp': '2025-10-01 04:25:26.919796', 'step': 6769, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:26.972865', 'step': 6769, 'epoch': 1} {'type': 'loss', 'content': 0.22373513877391815, 'timestamp': '2025-10-01 04:25:26.975787', 'step': 6770, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:25:27.029015', 'step': 6770, 'epoch': 1} {'type': 'loss', 'content': 0.09255174547433853, 'timestamp': '2025-10-01 04:25:27.032099', 'step': 6771, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:27.086243', 'step': 6771, 'epoch': 1} {'type': 'loss', 'content': 0.20856864750385284, 'timestamp': '2025-10-01 04:25:27.094141', 'step': 6772, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:25:27.149891', 'step': 6772, 'epoch': 1} {'type': 'loss', 'content': 0.09750093519687653, 'timestamp': '2025-10-01 04:25:27.152320', 'step': 6773, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:27.205445', 'step': 6773, 'epoch': 1} {'type': 'loss', 'content': 0.19677454233169556, 'timestamp': '2025-10-01 04:25:27.207985', 'step': 6774, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:27.261007', 'step': 6774, 'epoch': 1} {'type': 'loss', 'content': 0.061920974403619766, 'timestamp': '2025-10-01 04:25:27.263523', 'step': 6775, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:27.333635', 'step': 6775, 'epoch': 1} {'type': 'loss', 'content': 0.12032376974821091, 'timestamp': '2025-10-01 04:25:27.352419', 'step': 6776, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:27.405690', 'step': 6776, 'epoch': 1} {'type': 'loss', 'content': 0.11446677893400192, 'timestamp': '2025-10-01 04:25:27.408422', 'step': 6777, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:27.461591', 'step': 6777, 'epoch': 1} {'type': 'loss', 'content': 0.19862805306911469, 'timestamp': '2025-10-01 04:25:27.464199', 'step': 6778, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:27.518485', 'step': 6778, 'epoch': 1} {'type': 'loss', 'content': 0.17056787014007568, 'timestamp': '2025-10-01 04:25:27.520730', 'step': 6779, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:27.574947', 'step': 6779, 'epoch': 1} {'type': 'loss', 'content': 0.20416103303432465, 'timestamp': '2025-10-01 04:25:27.581360', 'step': 6780, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:27.633995', 'step': 6780, 'epoch': 1} {'type': 'loss', 'content': 0.09462299942970276, 'timestamp': '2025-10-01 04:25:27.636522', 'step': 6781, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:27.690214', 'step': 6781, 'epoch': 1} {'type': 'loss', 'content': 0.119526706635952, 'timestamp': '2025-10-01 04:25:27.692888', 'step': 6782, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:27.746356', 'step': 6782, 'epoch': 1} {'type': 'loss', 'content': 0.1987817883491516, 'timestamp': '2025-10-01 04:25:27.748778', 'step': 6783, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:27.812520', 'step': 6783, 'epoch': 1} {'type': 'loss', 'content': 0.14393919706344604, 'timestamp': '2025-10-01 04:25:27.830011', 'step': 6784, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:27.886790', 'step': 6784, 'epoch': 1} {'type': 'loss', 'content': 0.15690062940120697, 'timestamp': '2025-10-01 04:25:27.889378', 'step': 6785, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:27.943662', 'step': 6785, 'epoch': 1} {'type': 'loss', 'content': 0.1079665943980217, 'timestamp': '2025-10-01 04:25:27.946096', 'step': 6786, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:27.999468', 'step': 6786, 'epoch': 1} {'type': 'loss', 'content': 0.2736532986164093, 'timestamp': '2025-10-01 04:25:28.002016', 'step': 6787, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:28.056504', 'step': 6787, 'epoch': 1} {'type': 'loss', 'content': 0.11208368092775345, 'timestamp': '2025-10-01 04:25:28.062910', 'step': 6788, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:28.116096', 'step': 6788, 'epoch': 1} {'type': 'loss', 'content': 0.13735653460025787, 'timestamp': '2025-10-01 04:25:28.118557', 'step': 6789, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:28.172805', 'step': 6789, 'epoch': 1} {'type': 'loss', 'content': 0.1984889954328537, 'timestamp': '2025-10-01 04:25:28.175075', 'step': 6790, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:28.227991', 'step': 6790, 'epoch': 1} {'type': 'loss', 'content': 0.13366341590881348, 'timestamp': '2025-10-01 04:25:28.230400', 'step': 6791, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:28.283650', 'step': 6791, 'epoch': 1} {'type': 'loss', 'content': 0.1599092036485672, 'timestamp': '2025-10-01 04:25:28.290030', 'step': 6792, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:25:28.342996', 'step': 6792, 'epoch': 1} {'type': 'loss', 'content': 0.11235272139310837, 'timestamp': '2025-10-01 04:25:28.345271', 'step': 6793, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:25:28.398872', 'step': 6793, 'epoch': 1} {'type': 'loss', 'content': 0.15729743242263794, 'timestamp': '2025-10-01 04:25:28.401249', 'step': 6794, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:28.455005', 'step': 6794, 'epoch': 1} {'type': 'loss', 'content': 0.1014782190322876, 'timestamp': '2025-10-01 04:25:28.457798', 'step': 6795, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:28.512813', 'step': 6795, 'epoch': 1} {'type': 'loss', 'content': 0.1353645622730255, 'timestamp': '2025-10-01 04:25:28.519517', 'step': 6796, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:28.574383', 'step': 6796, 'epoch': 1} {'type': 'loss', 'content': 0.21403026580810547, 'timestamp': '2025-10-01 04:25:28.577020', 'step': 6797, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:28.633210', 'step': 6797, 'epoch': 1} {'type': 'loss', 'content': 0.19284182786941528, 'timestamp': '2025-10-01 04:25:28.636998', 'step': 6798, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:28.693331', 'step': 6798, 'epoch': 1} {'type': 'loss', 'content': 0.11742750555276871, 'timestamp': '2025-10-01 04:25:28.696045', 'step': 6799, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:28.750952', 'step': 6799, 'epoch': 1} {'type': 'loss', 'content': 0.24358047544956207, 'timestamp': '2025-10-01 04:25:28.757903', 'step': 6800, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:28.822237', 'step': 6800, 'epoch': 1} {'type': 'loss', 'content': 0.1323658674955368, 'timestamp': '2025-10-01 04:25:28.824962', 'step': 6801, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:28.878958', 'step': 6801, 'epoch': 1} {'type': 'loss', 'content': 0.24258752167224884, 'timestamp': '2025-10-01 04:25:28.881453', 'step': 6802, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:28.935892', 'step': 6802, 'epoch': 1} {'type': 'loss', 'content': 0.2241218090057373, 'timestamp': '2025-10-01 04:25:28.938483', 'step': 6803, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:28.992206', 'step': 6803, 'epoch': 1} {'type': 'loss', 'content': 0.12174010276794434, 'timestamp': '2025-10-01 04:25:28.998880', 'step': 6804, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:29.056469', 'step': 6804, 'epoch': 1} {'type': 'loss', 'content': 0.21689602732658386, 'timestamp': '2025-10-01 04:25:29.059347', 'step': 6805, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:29.113397', 'step': 6805, 'epoch': 1} {'type': 'loss', 'content': 0.1282835304737091, 'timestamp': '2025-10-01 04:25:29.116034', 'step': 6806, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:29.169909', 'step': 6806, 'epoch': 1} {'type': 'loss', 'content': 0.0895274206995964, 'timestamp': '2025-10-01 04:25:29.172429', 'step': 6807, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:29.225813', 'step': 6807, 'epoch': 1} {'type': 'loss', 'content': 0.15701226890087128, 'timestamp': '2025-10-01 04:25:29.233076', 'step': 6808, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:29.285656', 'step': 6808, 'epoch': 1} {'type': 'loss', 'content': 0.13182926177978516, 'timestamp': '2025-10-01 04:25:29.288472', 'step': 6809, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:29.342648', 'step': 6809, 'epoch': 1} {'type': 'loss', 'content': 0.19481465220451355, 'timestamp': '2025-10-01 04:25:29.345536', 'step': 6810, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:29.401158', 'step': 6810, 'epoch': 1} {'type': 'loss', 'content': 0.1139756441116333, 'timestamp': '2025-10-01 04:25:29.404028', 'step': 6811, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:29.467790', 'step': 6811, 'epoch': 1} {'type': 'loss', 'content': 0.20535066723823547, 'timestamp': '2025-10-01 04:25:29.474421', 'step': 6812, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:29.528345', 'step': 6812, 'epoch': 1} {'type': 'loss', 'content': 0.09236299246549606, 'timestamp': '2025-10-01 04:25:29.531205', 'step': 6813, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:29.585571', 'step': 6813, 'epoch': 1} {'type': 'loss', 'content': 0.09853110462427139, 'timestamp': '2025-10-01 04:25:29.588544', 'step': 6814, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:29.643831', 'step': 6814, 'epoch': 1} {'type': 'loss', 'content': 0.19482503831386566, 'timestamp': '2025-10-01 04:25:29.646738', 'step': 6815, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:29.713082', 'step': 6815, 'epoch': 1} {'type': 'loss', 'content': 0.10264188796281815, 'timestamp': '2025-10-01 04:25:29.719669', 'step': 6816, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:29.773615', 'step': 6816, 'epoch': 1} {'type': 'loss', 'content': 0.16429845988750458, 'timestamp': '2025-10-01 04:25:29.776299', 'step': 6817, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:29.829778', 'step': 6817, 'epoch': 1} {'type': 'loss', 'content': 0.18555863201618195, 'timestamp': '2025-10-01 04:25:29.832124', 'step': 6818, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:29.886173', 'step': 6818, 'epoch': 1} {'type': 'loss', 'content': 0.22564643621444702, 'timestamp': '2025-10-01 04:25:29.888921', 'step': 6819, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:29.943552', 'step': 6819, 'epoch': 1} {'type': 'loss', 'content': 0.0969744324684143, 'timestamp': '2025-10-01 04:25:29.962520', 'step': 6820, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:30.024086', 'step': 6820, 'epoch': 1} {'type': 'loss', 'content': 0.17481745779514313, 'timestamp': '2025-10-01 04:25:30.026965', 'step': 6821, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:30.080703', 'step': 6821, 'epoch': 1} {'type': 'loss', 'content': 0.1194465383887291, 'timestamp': '2025-10-01 04:25:30.083877', 'step': 6822, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:25:30.138653', 'step': 6822, 'epoch': 1} {'type': 'loss', 'content': 0.11275793612003326, 'timestamp': '2025-10-01 04:25:30.142391', 'step': 6823, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:25:30.196318', 'step': 6823, 'epoch': 1} {'type': 'loss', 'content': 0.17191873490810394, 'timestamp': '2025-10-01 04:25:30.202720', 'step': 6824, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:30.256039', 'step': 6824, 'epoch': 1} {'type': 'loss', 'content': 0.18007135391235352, 'timestamp': '2025-10-01 04:25:30.270830', 'step': 6825, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:30.325301', 'step': 6825, 'epoch': 1} {'type': 'loss', 'content': 0.20279419422149658, 'timestamp': '2025-10-01 04:25:30.328095', 'step': 6826, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:30.384587', 'step': 6826, 'epoch': 1} {'type': 'loss', 'content': 0.22322602570056915, 'timestamp': '2025-10-01 04:25:30.387772', 'step': 6827, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:25:30.443454', 'step': 6827, 'epoch': 1} {'type': 'loss', 'content': 0.10317628085613251, 'timestamp': '2025-10-01 04:25:30.450156', 'step': 6828, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:30.504551', 'step': 6828, 'epoch': 1} {'type': 'loss', 'content': 0.11962622404098511, 'timestamp': '2025-10-01 04:25:30.507365', 'step': 6829, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:25:30.569835', 'step': 6829, 'epoch': 1} {'type': 'loss', 'content': 0.09915605187416077, 'timestamp': '2025-10-01 04:25:30.572251', 'step': 6830, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:30.626695', 'step': 6830, 'epoch': 1} {'type': 'loss', 'content': 0.2412489354610443, 'timestamp': '2025-10-01 04:25:30.629430', 'step': 6831, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:30.694798', 'step': 6831, 'epoch': 1} {'type': 'loss', 'content': 0.09530927985906601, 'timestamp': '2025-10-01 04:25:30.701207', 'step': 6832, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:30.754541', 'step': 6832, 'epoch': 1} {'type': 'loss', 'content': 0.2078869789838791, 'timestamp': '2025-10-01 04:25:30.756845', 'step': 6833, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:25:30.810875', 'step': 6833, 'epoch': 1} {'type': 'loss', 'content': 0.13779573142528534, 'timestamp': '2025-10-01 04:25:30.813282', 'step': 6834, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:30.868264', 'step': 6834, 'epoch': 1} {'type': 'loss', 'content': 0.1288365125656128, 'timestamp': '2025-10-01 04:25:30.871803', 'step': 6835, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:30.937318', 'step': 6835, 'epoch': 1} {'type': 'loss', 'content': 0.15244923532009125, 'timestamp': '2025-10-01 04:25:30.944094', 'step': 6836, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:25:31.010719', 'step': 6836, 'epoch': 1} {'type': 'loss', 'content': 0.1868995875120163, 'timestamp': '2025-10-01 04:25:31.013140', 'step': 6837, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:31.068923', 'step': 6837, 'epoch': 1} {'type': 'loss', 'content': 0.12346099317073822, 'timestamp': '2025-10-01 04:25:31.085216', 'step': 6838, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:25:31.141808', 'step': 6838, 'epoch': 1} {'type': 'loss', 'content': 0.1660398244857788, 'timestamp': '2025-10-01 04:25:31.144421', 'step': 6839, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:31.197835', 'step': 6839, 'epoch': 1} {'type': 'loss', 'content': 0.13710547983646393, 'timestamp': '2025-10-01 04:25:31.204901', 'step': 6840, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:25:31.258246', 'step': 6840, 'epoch': 1} {'type': 'loss', 'content': 0.11305373907089233, 'timestamp': '2025-10-01 04:25:31.260619', 'step': 6841, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:31.314141', 'step': 6841, 'epoch': 1} {'type': 'loss', 'content': 0.14643612504005432, 'timestamp': '2025-10-01 04:25:31.317360', 'step': 6842, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:31.371374', 'step': 6842, 'epoch': 1} {'type': 'loss', 'content': 0.14458546042442322, 'timestamp': '2025-10-01 04:25:31.373726', 'step': 6843, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:31.428584', 'step': 6843, 'epoch': 1} {'type': 'loss', 'content': 0.13011205196380615, 'timestamp': '2025-10-01 04:25:31.435265', 'step': 6844, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:31.489773', 'step': 6844, 'epoch': 1} {'type': 'loss', 'content': 0.15047527849674225, 'timestamp': '2025-10-01 04:25:31.492365', 'step': 6845, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:31.545781', 'step': 6845, 'epoch': 1} {'type': 'loss', 'content': 0.09684781730175018, 'timestamp': '2025-10-01 04:25:31.548048', 'step': 6846, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:31.601153', 'step': 6846, 'epoch': 1} {'type': 'loss', 'content': 0.12193483859300613, 'timestamp': '2025-10-01 04:25:31.603554', 'step': 6847, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:31.656496', 'step': 6847, 'epoch': 1} {'type': 'loss', 'content': 0.12285519391298294, 'timestamp': '2025-10-01 04:25:31.663428', 'step': 6848, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:31.729397', 'step': 6848, 'epoch': 1} {'type': 'loss', 'content': 0.15011662244796753, 'timestamp': '2025-10-01 04:25:31.731988', 'step': 6849, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:31.800401', 'step': 6849, 'epoch': 1} {'type': 'loss', 'content': 0.0917452871799469, 'timestamp': '2025-10-01 04:25:31.802882', 'step': 6850, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:31.856429', 'step': 6850, 'epoch': 1} {'type': 'loss', 'content': 0.13116300106048584, 'timestamp': '2025-10-01 04:25:31.858699', 'step': 6851, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:31.913664', 'step': 6851, 'epoch': 1} {'type': 'loss', 'content': 0.13112008571624756, 'timestamp': '2025-10-01 04:25:31.919986', 'step': 6852, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:31.974584', 'step': 6852, 'epoch': 1} {'type': 'loss', 'content': 0.14669682085514069, 'timestamp': '2025-10-01 04:25:31.976908', 'step': 6853, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:25:32.030711', 'step': 6853, 'epoch': 1} {'type': 'loss', 'content': 0.21325217187404633, 'timestamp': '2025-10-01 04:25:32.033106', 'step': 6854, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:32.086955', 'step': 6854, 'epoch': 1} {'type': 'loss', 'content': 0.18444806337356567, 'timestamp': '2025-10-01 04:25:32.089844', 'step': 6855, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:32.158441', 'step': 6855, 'epoch': 1} {'type': 'loss', 'content': 0.19970211386680603, 'timestamp': '2025-10-01 04:25:32.164916', 'step': 6856, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:32.219708', 'step': 6856, 'epoch': 1} {'type': 'loss', 'content': 0.09632239490747452, 'timestamp': '2025-10-01 04:25:32.223103', 'step': 6857, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:32.288095', 'step': 6857, 'epoch': 1} {'type': 'loss', 'content': 0.10105966031551361, 'timestamp': '2025-10-01 04:25:32.290908', 'step': 6858, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:25:32.356653', 'step': 6858, 'epoch': 1} {'type': 'loss', 'content': 0.1072857677936554, 'timestamp': '2025-10-01 04:25:32.370337', 'step': 6859, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:32.436040', 'step': 6859, 'epoch': 1} {'type': 'loss', 'content': 0.1129588708281517, 'timestamp': '2025-10-01 04:25:32.442292', 'step': 6860, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:32.504964', 'step': 6860, 'epoch': 1} {'type': 'loss', 'content': 0.1608821302652359, 'timestamp': '2025-10-01 04:25:32.507373', 'step': 6861, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:32.564622', 'step': 6861, 'epoch': 1} {'type': 'loss', 'content': 0.15742208063602448, 'timestamp': '2025-10-01 04:25:32.566764', 'step': 6862, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:32.622483', 'step': 6862, 'epoch': 1} {'type': 'loss', 'content': 0.2504418194293976, 'timestamp': '2025-10-01 04:25:32.625165', 'step': 6863, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:32.678828', 'step': 6863, 'epoch': 1} {'type': 'loss', 'content': 0.19278542697429657, 'timestamp': '2025-10-01 04:25:32.684916', 'step': 6864, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:32.738239', 'step': 6864, 'epoch': 1} {'type': 'loss', 'content': 0.12349503487348557, 'timestamp': '2025-10-01 04:25:32.741907', 'step': 6865, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:32.800392', 'step': 6865, 'epoch': 1} {'type': 'loss', 'content': 0.13371358811855316, 'timestamp': '2025-10-01 04:25:32.802856', 'step': 6866, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:32.857813', 'step': 6866, 'epoch': 1} {'type': 'loss', 'content': 0.13712893426418304, 'timestamp': '2025-10-01 04:25:32.861784', 'step': 6867, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:32.917480', 'step': 6867, 'epoch': 1} {'type': 'loss', 'content': 0.11948657035827637, 'timestamp': '2025-10-01 04:25:32.924124', 'step': 6868, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:32.979381', 'step': 6868, 'epoch': 1} {'type': 'loss', 'content': 0.15900422632694244, 'timestamp': '2025-10-01 04:25:32.982389', 'step': 6869, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:33.051380', 'step': 6869, 'epoch': 1} {'type': 'loss', 'content': 0.15270154178142548, 'timestamp': '2025-10-01 04:25:33.055334', 'step': 6870, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:33.109024', 'step': 6870, 'epoch': 1} {'type': 'loss', 'content': 0.17807962000370026, 'timestamp': '2025-10-01 04:25:33.111611', 'step': 6871, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:33.170223', 'step': 6871, 'epoch': 1} {'type': 'loss', 'content': 0.17171840369701385, 'timestamp': '2025-10-01 04:25:33.177866', 'step': 6872, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:33.232282', 'step': 6872, 'epoch': 1} {'type': 'loss', 'content': 0.16126210987567902, 'timestamp': '2025-10-01 04:25:33.234693', 'step': 6873, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:25:33.288605', 'step': 6873, 'epoch': 1} {'type': 'loss', 'content': 0.1574552208185196, 'timestamp': '2025-10-01 04:25:33.292120', 'step': 6874, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:33.346561', 'step': 6874, 'epoch': 1} {'type': 'loss', 'content': 0.23184435069561005, 'timestamp': '2025-10-01 04:25:33.349060', 'step': 6875, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:25:33.402676', 'step': 6875, 'epoch': 1} {'type': 'loss', 'content': 0.14974455535411835, 'timestamp': '2025-10-01 04:25:33.409129', 'step': 6876, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:33.474772', 'step': 6876, 'epoch': 1} {'type': 'loss', 'content': 0.14325740933418274, 'timestamp': '2025-10-01 04:25:33.477870', 'step': 6877, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:33.544469', 'step': 6877, 'epoch': 1} {'type': 'loss', 'content': 0.20646841824054718, 'timestamp': '2025-10-01 04:25:33.546934', 'step': 6878, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:33.600855', 'step': 6878, 'epoch': 1} {'type': 'loss', 'content': 0.16606394946575165, 'timestamp': '2025-10-01 04:25:33.603460', 'step': 6879, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:33.671145', 'step': 6879, 'epoch': 1} {'type': 'loss', 'content': 0.1327132284641266, 'timestamp': '2025-10-01 04:25:33.677414', 'step': 6880, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:25:33.762702', 'step': 6880, 'epoch': 1} {'type': 'loss', 'content': 0.14699234068393707, 'timestamp': '2025-10-01 04:25:33.765231', 'step': 6881, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:33.822190', 'step': 6881, 'epoch': 1} {'type': 'loss', 'content': 0.287131130695343, 'timestamp': '2025-10-01 04:25:33.824692', 'step': 6882, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:33.881749', 'step': 6882, 'epoch': 1} {'type': 'loss', 'content': 0.12817341089248657, 'timestamp': '2025-10-01 04:25:33.884305', 'step': 6883, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:25:33.940414', 'step': 6883, 'epoch': 1} {'type': 'loss', 'content': 0.16102325916290283, 'timestamp': '2025-10-01 04:25:33.947093', 'step': 6884, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:34.003171', 'step': 6884, 'epoch': 1} {'type': 'loss', 'content': 0.13865484297275543, 'timestamp': '2025-10-01 04:25:34.017052', 'step': 6885, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:34.096107', 'step': 6885, 'epoch': 1} {'type': 'loss', 'content': 0.1092013493180275, 'timestamp': '2025-10-01 04:25:34.098464', 'step': 6886, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:34.165040', 'step': 6886, 'epoch': 1} {'type': 'loss', 'content': 0.14062325656414032, 'timestamp': '2025-10-01 04:25:34.167333', 'step': 6887, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:25:34.223655', 'step': 6887, 'epoch': 1} {'type': 'loss', 'content': 0.11579778790473938, 'timestamp': '2025-10-01 04:25:34.234501', 'step': 6888, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:34.311154', 'step': 6888, 'epoch': 1} {'type': 'loss', 'content': 0.10174795985221863, 'timestamp': '2025-10-01 04:25:34.326037', 'step': 6889, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:34.381216', 'step': 6889, 'epoch': 1} {'type': 'loss', 'content': 0.11096996814012527, 'timestamp': '2025-10-01 04:25:34.385979', 'step': 6890, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:34.444444', 'step': 6890, 'epoch': 1} {'type': 'loss', 'content': 0.14407840371131897, 'timestamp': '2025-10-01 04:25:34.446876', 'step': 6891, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:34.506991', 'step': 6891, 'epoch': 1} {'type': 'loss', 'content': 0.177244171500206, 'timestamp': '2025-10-01 04:25:34.516222', 'step': 6892, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:34.572913', 'step': 6892, 'epoch': 1} {'type': 'loss', 'content': 0.22878174483776093, 'timestamp': '2025-10-01 04:25:34.575072', 'step': 6893, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:34.630839', 'step': 6893, 'epoch': 1} {'type': 'loss', 'content': 0.09899448603391647, 'timestamp': '2025-10-01 04:25:34.633391', 'step': 6894, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:34.689153', 'step': 6894, 'epoch': 1} {'type': 'loss', 'content': 0.17370754480361938, 'timestamp': '2025-10-01 04:25:34.702940', 'step': 6895, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:34.768863', 'step': 6895, 'epoch': 1} {'type': 'loss', 'content': 0.20236055552959442, 'timestamp': '2025-10-01 04:25:34.775505', 'step': 6896, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:34.829334', 'step': 6896, 'epoch': 1} {'type': 'loss', 'content': 0.130166694521904, 'timestamp': '2025-10-01 04:25:34.831780', 'step': 6897, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:34.884613', 'step': 6897, 'epoch': 1} {'type': 'loss', 'content': 0.16223382949829102, 'timestamp': '2025-10-01 04:25:34.886843', 'step': 6898, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:34.939608', 'step': 6898, 'epoch': 1} {'type': 'loss', 'content': 0.21921470761299133, 'timestamp': '2025-10-01 04:25:34.942369', 'step': 6899, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:25:34.995805', 'step': 6899, 'epoch': 1} {'type': 'loss', 'content': 0.15703758597373962, 'timestamp': '2025-10-01 04:25:35.002319', 'step': 6900, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:25:35.054680', 'step': 6900, 'epoch': 1} {'type': 'loss', 'content': 0.12765540182590485, 'timestamp': '2025-10-01 04:25:35.056836', 'step': 6901, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:35.109293', 'step': 6901, 'epoch': 1} {'type': 'loss', 'content': 0.300142377614975, 'timestamp': '2025-10-01 04:25:35.111824', 'step': 6902, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:25:35.166828', 'step': 6902, 'epoch': 1} {'type': 'loss', 'content': 0.22589296102523804, 'timestamp': '2025-10-01 04:25:35.168766', 'step': 6903, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:35.221441', 'step': 6903, 'epoch': 1} {'type': 'loss', 'content': 0.12782439589500427, 'timestamp': '2025-10-01 04:25:35.227277', 'step': 6904, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:35.279500', 'step': 6904, 'epoch': 1} {'type': 'loss', 'content': 0.206083744764328, 'timestamp': '2025-10-01 04:25:35.281836', 'step': 6905, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:35.347020', 'step': 6905, 'epoch': 1} {'type': 'loss', 'content': 0.21077680587768555, 'timestamp': '2025-10-01 04:25:35.349916', 'step': 6906, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:25:35.408107', 'step': 6906, 'epoch': 1} {'type': 'loss', 'content': 0.16300560534000397, 'timestamp': '2025-10-01 04:25:35.411819', 'step': 6907, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:35.467528', 'step': 6907, 'epoch': 1} {'type': 'loss', 'content': 0.10034175217151642, 'timestamp': '2025-10-01 04:25:35.473507', 'step': 6908, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:35.526362', 'step': 6908, 'epoch': 1} {'type': 'loss', 'content': 0.14182816445827484, 'timestamp': '2025-10-01 04:25:35.529878', 'step': 6909, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:35.585894', 'step': 6909, 'epoch': 1} {'type': 'loss', 'content': 0.2005361020565033, 'timestamp': '2025-10-01 04:25:35.598942', 'step': 6910, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:35.655018', 'step': 6910, 'epoch': 1} {'type': 'loss', 'content': 0.1563485711812973, 'timestamp': '2025-10-01 04:25:35.657341', 'step': 6911, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:35.710675', 'step': 6911, 'epoch': 1} {'type': 'loss', 'content': 0.12523137032985687, 'timestamp': '2025-10-01 04:25:35.717378', 'step': 6912, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:35.770938', 'step': 6912, 'epoch': 1} {'type': 'loss', 'content': 0.15644842386245728, 'timestamp': '2025-10-01 04:25:35.774891', 'step': 6913, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:35.829042', 'step': 6913, 'epoch': 1} {'type': 'loss', 'content': 0.15167421102523804, 'timestamp': '2025-10-01 04:25:35.832728', 'step': 6914, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:35.885910', 'step': 6914, 'epoch': 1} {'type': 'loss', 'content': 0.206713005900383, 'timestamp': '2025-10-01 04:25:35.888362', 'step': 6915, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:35.941191', 'step': 6915, 'epoch': 1} {'type': 'loss', 'content': 0.1062370166182518, 'timestamp': '2025-10-01 04:25:35.960759', 'step': 6916, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:25:36.013251', 'step': 6916, 'epoch': 1} {'type': 'loss', 'content': 0.10205189138650894, 'timestamp': '2025-10-01 04:25:36.015552', 'step': 6917, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:36.070640', 'step': 6917, 'epoch': 1} {'type': 'loss', 'content': 0.0813722014427185, 'timestamp': '2025-10-01 04:25:36.073152', 'step': 6918, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:25:36.129723', 'step': 6918, 'epoch': 1} {'type': 'loss', 'content': 0.15779747068881989, 'timestamp': '2025-10-01 04:25:36.132176', 'step': 6919, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:36.186130', 'step': 6919, 'epoch': 1} {'type': 'loss', 'content': 0.11048661172389984, 'timestamp': '2025-10-01 04:25:36.196981', 'step': 6920, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:36.252612', 'step': 6920, 'epoch': 1} {'type': 'loss', 'content': 0.10863812267780304, 'timestamp': '2025-10-01 04:25:36.254779', 'step': 6921, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:36.314266', 'step': 6921, 'epoch': 1} {'type': 'loss', 'content': 0.12994126975536346, 'timestamp': '2025-10-01 04:25:36.316601', 'step': 6922, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:36.369585', 'step': 6922, 'epoch': 1} {'type': 'loss', 'content': 0.17797695100307465, 'timestamp': '2025-10-01 04:25:36.371783', 'step': 6923, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:36.425020', 'step': 6923, 'epoch': 1} {'type': 'loss', 'content': 0.12894833087921143, 'timestamp': '2025-10-01 04:25:36.430697', 'step': 6924, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:36.494463', 'step': 6924, 'epoch': 1} {'type': 'loss', 'content': 0.12963849306106567, 'timestamp': '2025-10-01 04:25:36.496694', 'step': 6925, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:25:36.550032', 'step': 6925, 'epoch': 1} {'type': 'loss', 'content': 0.1553545594215393, 'timestamp': '2025-10-01 04:25:36.552390', 'step': 6926, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:36.606159', 'step': 6926, 'epoch': 1} {'type': 'loss', 'content': 0.18692569434642792, 'timestamp': '2025-10-01 04:25:36.608456', 'step': 6927, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:36.661301', 'step': 6927, 'epoch': 1} {'type': 'loss', 'content': 0.1790899932384491, 'timestamp': '2025-10-01 04:25:36.676267', 'step': 6928, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:36.729489', 'step': 6928, 'epoch': 1} {'type': 'loss', 'content': 0.1537422090768814, 'timestamp': '2025-10-01 04:25:36.733309', 'step': 6929, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:36.794017', 'step': 6929, 'epoch': 1} {'type': 'loss', 'content': 0.18396136164665222, 'timestamp': '2025-10-01 04:25:36.796419', 'step': 6930, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:36.849249', 'step': 6930, 'epoch': 1} {'type': 'loss', 'content': 0.11041881889104843, 'timestamp': '2025-10-01 04:25:36.851823', 'step': 6931, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:36.916605', 'step': 6931, 'epoch': 1} {'type': 'loss', 'content': 0.08105766773223877, 'timestamp': '2025-10-01 04:25:36.922488', 'step': 6932, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:25:36.975228', 'step': 6932, 'epoch': 1} {'type': 'loss', 'content': 0.07385856658220291, 'timestamp': '2025-10-01 04:25:36.977803', 'step': 6933, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:37.031115', 'step': 6933, 'epoch': 1} {'type': 'loss', 'content': 0.14559976756572723, 'timestamp': '2025-10-01 04:25:37.033503', 'step': 6934, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:37.086895', 'step': 6934, 'epoch': 1} {'type': 'loss', 'content': 0.11941524595022202, 'timestamp': '2025-10-01 04:25:37.089353', 'step': 6935, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:37.142688', 'step': 6935, 'epoch': 1} {'type': 'loss', 'content': 0.1347363144159317, 'timestamp': '2025-10-01 04:25:37.148519', 'step': 6936, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:37.201944', 'step': 6936, 'epoch': 1} {'type': 'loss', 'content': 0.11441263556480408, 'timestamp': '2025-10-01 04:25:37.204175', 'step': 6937, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:37.257582', 'step': 6937, 'epoch': 1} {'type': 'loss', 'content': 0.10509372502565384, 'timestamp': '2025-10-01 04:25:37.259960', 'step': 6938, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-01 04:25:37.313416', 'step': 6938, 'epoch': 1} {'type': 'loss', 'content': 0.18150721490383148, 'timestamp': '2025-10-01 04:25:37.315895', 'step': 6939, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:37.370286', 'step': 6939, 'epoch': 1} {'type': 'loss', 'content': 0.1800721287727356, 'timestamp': '2025-10-01 04:25:37.376463', 'step': 6940, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:37.429274', 'step': 6940, 'epoch': 1} {'type': 'loss', 'content': 0.17837895452976227, 'timestamp': '2025-10-01 04:25:37.431462', 'step': 6941, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:37.484289', 'step': 6941, 'epoch': 1} {'type': 'loss', 'content': 0.17676769196987152, 'timestamp': '2025-10-01 04:25:37.486643', 'step': 6942, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:37.541799', 'step': 6942, 'epoch': 1} {'type': 'loss', 'content': 0.15601098537445068, 'timestamp': '2025-10-01 04:25:37.544264', 'step': 6943, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:37.598187', 'step': 6943, 'epoch': 1} {'type': 'loss', 'content': 0.1099642887711525, 'timestamp': '2025-10-01 04:25:37.604558', 'step': 6944, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:37.658342', 'step': 6944, 'epoch': 1} {'type': 'loss', 'content': 0.1873854696750641, 'timestamp': '2025-10-01 04:25:37.661504', 'step': 6945, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:37.715538', 'step': 6945, 'epoch': 1} {'type': 'loss', 'content': 0.1200934648513794, 'timestamp': '2025-10-01 04:25:37.718163', 'step': 6946, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:37.772047', 'step': 6946, 'epoch': 1} {'type': 'loss', 'content': 0.1471053659915924, 'timestamp': '2025-10-01 04:25:37.774823', 'step': 6947, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:25:37.829479', 'step': 6947, 'epoch': 1} {'type': 'loss', 'content': 0.11348345875740051, 'timestamp': '2025-10-01 04:25:37.835765', 'step': 6948, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:37.889001', 'step': 6948, 'epoch': 1} {'type': 'loss', 'content': 0.14840325713157654, 'timestamp': '2025-10-01 04:25:37.891451', 'step': 6949, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:25:37.955590', 'step': 6949, 'epoch': 1} {'type': 'loss', 'content': 0.2079794555902481, 'timestamp': '2025-10-01 04:25:37.958104', 'step': 6950, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:38.017709', 'step': 6950, 'epoch': 1} {'type': 'loss', 'content': 0.16208942234516144, 'timestamp': '2025-10-01 04:25:38.020402', 'step': 6951, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:38.074717', 'step': 6951, 'epoch': 1} {'type': 'loss', 'content': 0.12696227431297302, 'timestamp': '2025-10-01 04:25:38.081056', 'step': 6952, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:38.137251', 'step': 6952, 'epoch': 1} {'type': 'loss', 'content': 0.15986070036888123, 'timestamp': '2025-10-01 04:25:38.139779', 'step': 6953, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:38.203705', 'step': 6953, 'epoch': 1} {'type': 'loss', 'content': 0.12749603390693665, 'timestamp': '2025-10-01 04:25:38.206589', 'step': 6954, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:25:38.275695', 'step': 6954, 'epoch': 1} {'type': 'loss', 'content': 0.20875480771064758, 'timestamp': '2025-10-01 04:25:38.278450', 'step': 6955, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:38.334328', 'step': 6955, 'epoch': 1} {'type': 'loss', 'content': 0.14351433515548706, 'timestamp': '2025-10-01 04:25:38.340516', 'step': 6956, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:38.394309', 'step': 6956, 'epoch': 1} {'type': 'loss', 'content': 0.2787158191204071, 'timestamp': '2025-10-01 04:25:38.396785', 'step': 6957, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:38.459011', 'step': 6957, 'epoch': 1} {'type': 'loss', 'content': 0.1259092539548874, 'timestamp': '2025-10-01 04:25:38.461532', 'step': 6958, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:38.527596', 'step': 6958, 'epoch': 1} {'type': 'loss', 'content': 0.20187808573246002, 'timestamp': '2025-10-01 04:25:38.530696', 'step': 6959, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:25:38.585648', 'step': 6959, 'epoch': 1} {'type': 'loss', 'content': 0.16046905517578125, 'timestamp': '2025-10-01 04:25:38.592109', 'step': 6960, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:38.645897', 'step': 6960, 'epoch': 1} {'type': 'loss', 'content': 0.18872304260730743, 'timestamp': '2025-10-01 04:25:38.648670', 'step': 6961, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:38.702908', 'step': 6961, 'epoch': 1} {'type': 'loss', 'content': 0.14889374375343323, 'timestamp': '2025-10-01 04:25:38.705286', 'step': 6962, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:38.759192', 'step': 6962, 'epoch': 1} {'type': 'loss', 'content': 0.11830580234527588, 'timestamp': '2025-10-01 04:25:38.762074', 'step': 6963, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:38.815814', 'step': 6963, 'epoch': 1} {'type': 'loss', 'content': 0.224045991897583, 'timestamp': '2025-10-01 04:25:38.831858', 'step': 6964, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:38.885112', 'step': 6964, 'epoch': 1} {'type': 'loss', 'content': 0.1516130268573761, 'timestamp': '2025-10-01 04:25:38.887472', 'step': 6965, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:38.955606', 'step': 6965, 'epoch': 1} {'type': 'loss', 'content': 0.10930747538805008, 'timestamp': '2025-10-01 04:25:38.958553', 'step': 6966, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:39.012895', 'step': 6966, 'epoch': 1} {'type': 'loss', 'content': 0.11980783939361572, 'timestamp': '2025-10-01 04:25:39.015269', 'step': 6967, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:39.068781', 'step': 6967, 'epoch': 1} {'type': 'loss', 'content': 0.18779465556144714, 'timestamp': '2025-10-01 04:25:39.074560', 'step': 6968, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:39.138827', 'step': 6968, 'epoch': 1} {'type': 'loss', 'content': 0.07606092095375061, 'timestamp': '2025-10-01 04:25:39.141309', 'step': 6969, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:39.194948', 'step': 6969, 'epoch': 1} {'type': 'loss', 'content': 0.1361418068408966, 'timestamp': '2025-10-01 04:25:39.197071', 'step': 6970, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:39.250695', 'step': 6970, 'epoch': 1} {'type': 'loss', 'content': 0.14415892958641052, 'timestamp': '2025-10-01 04:25:39.253069', 'step': 6971, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:39.306308', 'step': 6971, 'epoch': 1} {'type': 'loss', 'content': 0.13522650301456451, 'timestamp': '2025-10-01 04:25:39.313234', 'step': 6972, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:25:39.399136', 'step': 6972, 'epoch': 1} {'type': 'loss', 'content': 0.30311664938926697, 'timestamp': '2025-10-01 04:25:39.410563', 'step': 6973, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:39.472204', 'step': 6973, 'epoch': 1} {'type': 'loss', 'content': 0.1743989735841751, 'timestamp': '2025-10-01 04:25:39.474386', 'step': 6974, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:39.528260', 'step': 6974, 'epoch': 1} {'type': 'loss', 'content': 0.18844449520111084, 'timestamp': '2025-10-01 04:25:39.531075', 'step': 6975, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:39.584165', 'step': 6975, 'epoch': 1} {'type': 'loss', 'content': 0.15394257009029388, 'timestamp': '2025-10-01 04:25:39.590150', 'step': 6976, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:39.658676', 'step': 6976, 'epoch': 1} {'type': 'loss', 'content': 0.21190568804740906, 'timestamp': '2025-10-01 04:25:39.660863', 'step': 6977, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:39.716497', 'step': 6977, 'epoch': 1} {'type': 'loss', 'content': 0.25471097230911255, 'timestamp': '2025-10-01 04:25:39.718889', 'step': 6978, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:39.782440', 'step': 6978, 'epoch': 1} {'type': 'loss', 'content': 0.11258599907159805, 'timestamp': '2025-10-01 04:25:39.784818', 'step': 6979, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:39.840576', 'step': 6979, 'epoch': 1} {'type': 'loss', 'content': 0.1527116745710373, 'timestamp': '2025-10-01 04:25:39.847320', 'step': 6980, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:39.901569', 'step': 6980, 'epoch': 1} {'type': 'loss', 'content': 0.12977078557014465, 'timestamp': '2025-10-01 04:25:39.903914', 'step': 6981, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:39.956693', 'step': 6981, 'epoch': 1} {'type': 'loss', 'content': 0.07983041554689407, 'timestamp': '2025-10-01 04:25:39.960278', 'step': 6982, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:40.028196', 'step': 6982, 'epoch': 1} {'type': 'loss', 'content': 0.17948420345783234, 'timestamp': '2025-10-01 04:25:40.031951', 'step': 6983, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:40.084769', 'step': 6983, 'epoch': 1} {'type': 'loss', 'content': 0.11083995550870895, 'timestamp': '2025-10-01 04:25:40.090513', 'step': 6984, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:40.143377', 'step': 6984, 'epoch': 1} {'type': 'loss', 'content': 0.1198246031999588, 'timestamp': '2025-10-01 04:25:40.145621', 'step': 6985, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:25:40.212484', 'step': 6985, 'epoch': 1} {'type': 'loss', 'content': 0.2405095100402832, 'timestamp': '2025-10-01 04:25:40.214933', 'step': 6986, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:40.268700', 'step': 6986, 'epoch': 1} {'type': 'loss', 'content': 0.20400860905647278, 'timestamp': '2025-10-01 04:25:40.273685', 'step': 6987, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:40.329573', 'step': 6987, 'epoch': 1} {'type': 'loss', 'content': 0.11554202437400818, 'timestamp': '2025-10-01 04:25:40.336341', 'step': 6988, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:40.404282', 'step': 6988, 'epoch': 1} {'type': 'loss', 'content': 0.12342320382595062, 'timestamp': '2025-10-01 04:25:40.407063', 'step': 6989, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:40.460802', 'step': 6989, 'epoch': 1} {'type': 'loss', 'content': 0.20311710238456726, 'timestamp': '2025-10-01 04:25:40.463958', 'step': 6990, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-01 04:25:54.424635', 'step': 6990, 'epoch': 1} {'type': 'pplx', 'content': 10715.07973958472, 'timestamp': '2025-10-01 04:25:54.427861', 'step': 6990, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:54.496479', 'step': 6990, 'epoch': 1} {'type': 'loss', 'content': 0.1833467334508896, 'timestamp': '2025-10-01 04:25:54.499323', 'step': 6991, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:54.555353', 'step': 6991, 'epoch': 1} {'type': 'loss', 'content': 0.15806911885738373, 'timestamp': '2025-10-01 04:25:54.561698', 'step': 6992, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:54.615578', 'step': 6992, 'epoch': 1} {'type': 'loss', 'content': 0.19078154861927032, 'timestamp': '2025-10-01 04:25:54.618011', 'step': 6993, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:25:54.673197', 'step': 6993, 'epoch': 1} {'type': 'loss', 'content': 0.10988015681505203, 'timestamp': '2025-10-01 04:25:54.675555', 'step': 6994, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:54.731111', 'step': 6994, 'epoch': 1} {'type': 'loss', 'content': 0.24184823036193848, 'timestamp': '2025-10-01 04:25:54.733721', 'step': 6995, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:54.799354', 'step': 6995, 'epoch': 1} {'type': 'loss', 'content': 0.08840521425008774, 'timestamp': '2025-10-01 04:25:54.819162', 'step': 6996, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:25:54.873936', 'step': 6996, 'epoch': 1} {'type': 'loss', 'content': 0.1405194103717804, 'timestamp': '2025-10-01 04:25:54.877297', 'step': 6997, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:54.931500', 'step': 6997, 'epoch': 1} {'type': 'loss', 'content': 0.21500784158706665, 'timestamp': '2025-10-01 04:25:54.933658', 'step': 6998, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:54.988012', 'step': 6998, 'epoch': 1} {'type': 'loss', 'content': 0.11564319580793381, 'timestamp': '2025-10-01 04:25:54.990272', 'step': 6999, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:55.061806', 'step': 6999, 'epoch': 1} {'type': 'loss', 'content': 0.20141151547431946, 'timestamp': '2025-10-01 04:25:55.068035', 'step': 7000, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 7000', 'timestamp': '2025-10-01 04:25:55.441667', 'step': 7000, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:55.499970', 'step': 7000, 'epoch': 1} {'type': 'loss', 'content': 0.09873057901859283, 'timestamp': '2025-10-01 04:25:55.502570', 'step': 7001, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:55.557320', 'step': 7001, 'epoch': 1} {'type': 'loss', 'content': 0.1745479553937912, 'timestamp': '2025-10-01 04:25:55.559599', 'step': 7002, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:55.623249', 'step': 7002, 'epoch': 1} {'type': 'loss', 'content': 0.286222368478775, 'timestamp': '2025-10-01 04:25:55.625541', 'step': 7003, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:55.680615', 'step': 7003, 'epoch': 1} {'type': 'loss', 'content': 0.15877549350261688, 'timestamp': '2025-10-01 04:25:55.687029', 'step': 7004, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:55.740921', 'step': 7004, 'epoch': 1} {'type': 'loss', 'content': 0.14625394344329834, 'timestamp': '2025-10-01 04:25:55.743405', 'step': 7005, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:55.797880', 'step': 7005, 'epoch': 1} {'type': 'loss', 'content': 0.13708631694316864, 'timestamp': '2025-10-01 04:25:55.812494', 'step': 7006, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:55.865884', 'step': 7006, 'epoch': 1} {'type': 'loss', 'content': 0.14504623413085938, 'timestamp': '2025-10-01 04:25:55.868091', 'step': 7007, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:55.921448', 'step': 7007, 'epoch': 1} {'type': 'loss', 'content': 0.09280535578727722, 'timestamp': '2025-10-01 04:25:55.927470', 'step': 7008, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:55.981179', 'step': 7008, 'epoch': 1} {'type': 'loss', 'content': 0.1699182391166687, 'timestamp': '2025-10-01 04:25:55.983307', 'step': 7009, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:56.037476', 'step': 7009, 'epoch': 1} {'type': 'loss', 'content': 0.12324944883584976, 'timestamp': '2025-10-01 04:25:56.040006', 'step': 7010, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:56.093868', 'step': 7010, 'epoch': 1} {'type': 'loss', 'content': 0.13276658952236176, 'timestamp': '2025-10-01 04:25:56.103396', 'step': 7011, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:56.158390', 'step': 7011, 'epoch': 1} {'type': 'loss', 'content': 0.16359873116016388, 'timestamp': '2025-10-01 04:25:56.176505', 'step': 7012, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:56.229476', 'step': 7012, 'epoch': 1} {'type': 'loss', 'content': 0.225934699177742, 'timestamp': '2025-10-01 04:25:56.235377', 'step': 7013, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:56.289472', 'step': 7013, 'epoch': 1} {'type': 'loss', 'content': 0.22633971273899078, 'timestamp': '2025-10-01 04:25:56.291674', 'step': 7014, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:56.359380', 'step': 7014, 'epoch': 1} {'type': 'loss', 'content': 0.18206264078617096, 'timestamp': '2025-10-01 04:25:56.361822', 'step': 7015, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:56.417531', 'step': 7015, 'epoch': 1} {'type': 'loss', 'content': 0.22409354150295258, 'timestamp': '2025-10-01 04:25:56.423515', 'step': 7016, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:56.492197', 'step': 7016, 'epoch': 1} {'type': 'loss', 'content': 0.23384122550487518, 'timestamp': '2025-10-01 04:25:56.497091', 'step': 7017, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:56.559730', 'step': 7017, 'epoch': 1} {'type': 'loss', 'content': 0.20533278584480286, 'timestamp': '2025-10-01 04:25:56.561735', 'step': 7018, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:56.616277', 'step': 7018, 'epoch': 1} {'type': 'loss', 'content': 0.17274633049964905, 'timestamp': '2025-10-01 04:25:56.618706', 'step': 7019, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:56.673205', 'step': 7019, 'epoch': 1} {'type': 'loss', 'content': 0.08926010876893997, 'timestamp': '2025-10-01 04:25:56.680102', 'step': 7020, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:56.734160', 'step': 7020, 'epoch': 1} {'type': 'loss', 'content': 0.19691957533359528, 'timestamp': '2025-10-01 04:25:56.736465', 'step': 7021, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:56.791481', 'step': 7021, 'epoch': 1} {'type': 'loss', 'content': 0.13418088853359222, 'timestamp': '2025-10-01 04:25:56.793889', 'step': 7022, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:25:56.848480', 'step': 7022, 'epoch': 1} {'type': 'loss', 'content': 0.15528801083564758, 'timestamp': '2025-10-01 04:25:56.851515', 'step': 7023, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:56.907305', 'step': 7023, 'epoch': 1} {'type': 'loss', 'content': 0.0950154960155487, 'timestamp': '2025-10-01 04:25:56.913812', 'step': 7024, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:56.969182', 'step': 7024, 'epoch': 1} {'type': 'loss', 'content': 0.20944933593273163, 'timestamp': '2025-10-01 04:25:56.974396', 'step': 7025, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:57.028910', 'step': 7025, 'epoch': 1} {'type': 'loss', 'content': 0.2660027742385864, 'timestamp': '2025-10-01 04:25:57.031208', 'step': 7026, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:57.085761', 'step': 7026, 'epoch': 1} {'type': 'loss', 'content': 0.16786350309848785, 'timestamp': '2025-10-01 04:25:57.087965', 'step': 7027, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:57.142548', 'step': 7027, 'epoch': 1} {'type': 'loss', 'content': 0.17428246140480042, 'timestamp': '2025-10-01 04:25:57.148950', 'step': 7028, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:25:57.202473', 'step': 7028, 'epoch': 1} {'type': 'loss', 'content': 0.1147666722536087, 'timestamp': '2025-10-01 04:25:57.205116', 'step': 7029, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:57.259033', 'step': 7029, 'epoch': 1} {'type': 'loss', 'content': 0.15034404397010803, 'timestamp': '2025-10-01 04:25:57.261242', 'step': 7030, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:57.315787', 'step': 7030, 'epoch': 1} {'type': 'loss', 'content': 0.07800330966711044, 'timestamp': '2025-10-01 04:25:57.318121', 'step': 7031, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:25:57.371374', 'step': 7031, 'epoch': 1} {'type': 'loss', 'content': 0.103679358959198, 'timestamp': '2025-10-01 04:25:57.377377', 'step': 7032, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:57.440269', 'step': 7032, 'epoch': 1} {'type': 'loss', 'content': 0.13081328570842743, 'timestamp': '2025-10-01 04:25:57.442569', 'step': 7033, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:57.495368', 'step': 7033, 'epoch': 1} {'type': 'loss', 'content': 0.1759527325630188, 'timestamp': '2025-10-01 04:25:57.497600', 'step': 7034, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:57.551028', 'step': 7034, 'epoch': 1} {'type': 'loss', 'content': 0.10803906619548798, 'timestamp': '2025-10-01 04:25:57.553358', 'step': 7035, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:57.606527', 'step': 7035, 'epoch': 1} {'type': 'loss', 'content': 0.13758280873298645, 'timestamp': '2025-10-01 04:25:57.612392', 'step': 7036, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:25:57.665881', 'step': 7036, 'epoch': 1} {'type': 'loss', 'content': 0.17448115348815918, 'timestamp': '2025-10-01 04:25:57.668102', 'step': 7037, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:57.729437', 'step': 7037, 'epoch': 1} {'type': 'loss', 'content': 0.12197776138782501, 'timestamp': '2025-10-01 04:25:57.736094', 'step': 7038, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:25:57.790259', 'step': 7038, 'epoch': 1} {'type': 'loss', 'content': 0.14089368283748627, 'timestamp': '2025-10-01 04:25:57.792629', 'step': 7039, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:57.846136', 'step': 7039, 'epoch': 1} {'type': 'loss', 'content': 0.1715814769268036, 'timestamp': '2025-10-01 04:25:57.852239', 'step': 7040, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:57.913065', 'step': 7040, 'epoch': 1} {'type': 'loss', 'content': 0.11154288798570633, 'timestamp': '2025-10-01 04:25:57.916088', 'step': 7041, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:57.970178', 'step': 7041, 'epoch': 1} {'type': 'loss', 'content': 0.21235457062721252, 'timestamp': '2025-10-01 04:25:57.973521', 'step': 7042, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:58.027268', 'step': 7042, 'epoch': 1} {'type': 'loss', 'content': 0.15638214349746704, 'timestamp': '2025-10-01 04:25:58.029564', 'step': 7043, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:58.084839', 'step': 7043, 'epoch': 1} {'type': 'loss', 'content': 0.15091484785079956, 'timestamp': '2025-10-01 04:25:58.092025', 'step': 7044, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:58.163303', 'step': 7044, 'epoch': 1} {'type': 'loss', 'content': 0.15288862586021423, 'timestamp': '2025-10-01 04:25:58.165947', 'step': 7045, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:58.223851', 'step': 7045, 'epoch': 1} {'type': 'loss', 'content': 0.12590467929840088, 'timestamp': '2025-10-01 04:25:58.226084', 'step': 7046, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:58.280039', 'step': 7046, 'epoch': 1} {'type': 'loss', 'content': 0.20280897617340088, 'timestamp': '2025-10-01 04:25:58.282369', 'step': 7047, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:58.337912', 'step': 7047, 'epoch': 1} {'type': 'loss', 'content': 0.2289142906665802, 'timestamp': '2025-10-01 04:25:58.353438', 'step': 7048, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:58.408326', 'step': 7048, 'epoch': 1} {'type': 'loss', 'content': 0.13667769730091095, 'timestamp': '2025-10-01 04:25:58.411408', 'step': 7049, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:58.467143', 'step': 7049, 'epoch': 1} {'type': 'loss', 'content': 0.1171872541308403, 'timestamp': '2025-10-01 04:25:58.469327', 'step': 7050, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:58.525030', 'step': 7050, 'epoch': 1} {'type': 'loss', 'content': 0.20872065424919128, 'timestamp': '2025-10-01 04:25:58.527163', 'step': 7051, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:25:58.582264', 'step': 7051, 'epoch': 1} {'type': 'loss', 'content': 0.20123408734798431, 'timestamp': '2025-10-01 04:25:58.588690', 'step': 7052, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:58.648110', 'step': 7052, 'epoch': 1} {'type': 'loss', 'content': 0.17608287930488586, 'timestamp': '2025-10-01 04:25:58.662181', 'step': 7053, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:25:58.719381', 'step': 7053, 'epoch': 1} {'type': 'loss', 'content': 0.16215787827968597, 'timestamp': '2025-10-01 04:25:58.721613', 'step': 7054, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:58.777821', 'step': 7054, 'epoch': 1} {'type': 'loss', 'content': 0.17331932485103607, 'timestamp': '2025-10-01 04:25:58.780801', 'step': 7055, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:58.834728', 'step': 7055, 'epoch': 1} {'type': 'loss', 'content': 0.08108584582805634, 'timestamp': '2025-10-01 04:25:58.840630', 'step': 7056, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:58.900216', 'step': 7056, 'epoch': 1} {'type': 'loss', 'content': 0.17815308272838593, 'timestamp': '2025-10-01 04:25:58.902439', 'step': 7057, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:25:58.962597', 'step': 7057, 'epoch': 1} {'type': 'loss', 'content': 0.21825866401195526, 'timestamp': '2025-10-01 04:25:58.965308', 'step': 7058, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:25:59.019865', 'step': 7058, 'epoch': 1} {'type': 'loss', 'content': 0.26212555170059204, 'timestamp': '2025-10-01 04:25:59.022174', 'step': 7059, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:25:59.076590', 'step': 7059, 'epoch': 1} {'type': 'loss', 'content': 0.2991872727870941, 'timestamp': '2025-10-01 04:25:59.083089', 'step': 7060, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:25:59.136124', 'step': 7060, 'epoch': 1} {'type': 'loss', 'content': 0.13829873502254486, 'timestamp': '2025-10-01 04:25:59.139277', 'step': 7061, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:25:59.191999', 'step': 7061, 'epoch': 1} {'type': 'loss', 'content': 0.2920175790786743, 'timestamp': '2025-10-01 04:25:59.194563', 'step': 7062, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:59.248460', 'step': 7062, 'epoch': 1} {'type': 'loss', 'content': 0.2617878317832947, 'timestamp': '2025-10-01 04:25:59.251027', 'step': 7063, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:25:59.305202', 'step': 7063, 'epoch': 1} {'type': 'loss', 'content': 0.216999351978302, 'timestamp': '2025-10-01 04:25:59.311175', 'step': 7064, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:59.365439', 'step': 7064, 'epoch': 1} {'type': 'loss', 'content': 0.1650993525981903, 'timestamp': '2025-10-01 04:25:59.368344', 'step': 7065, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:59.423294', 'step': 7065, 'epoch': 1} {'type': 'loss', 'content': 0.2085759937763214, 'timestamp': '2025-10-01 04:25:59.427292', 'step': 7066, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:25:59.481474', 'step': 7066, 'epoch': 1} {'type': 'loss', 'content': 0.1989879310131073, 'timestamp': '2025-10-01 04:25:59.483857', 'step': 7067, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:59.549267', 'step': 7067, 'epoch': 1} {'type': 'loss', 'content': 0.13266630470752716, 'timestamp': '2025-10-01 04:25:59.555208', 'step': 7068, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:59.610137', 'step': 7068, 'epoch': 1} {'type': 'loss', 'content': 0.16044923663139343, 'timestamp': '2025-10-01 04:25:59.613004', 'step': 7069, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:59.666195', 'step': 7069, 'epoch': 1} {'type': 'loss', 'content': 0.11454255878925323, 'timestamp': '2025-10-01 04:25:59.668490', 'step': 7070, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:59.723081', 'step': 7070, 'epoch': 1} {'type': 'loss', 'content': 0.18745848536491394, 'timestamp': '2025-10-01 04:25:59.725424', 'step': 7071, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:59.782967', 'step': 7071, 'epoch': 1} {'type': 'loss', 'content': 0.15404215455055237, 'timestamp': '2025-10-01 04:25:59.789729', 'step': 7072, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:59.843223', 'step': 7072, 'epoch': 1} {'type': 'loss', 'content': 0.14260417222976685, 'timestamp': '2025-10-01 04:25:59.845499', 'step': 7073, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:25:59.920593', 'step': 7073, 'epoch': 1} {'type': 'loss', 'content': 0.1600269228219986, 'timestamp': '2025-10-01 04:25:59.923167', 'step': 7074, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:25:59.977691', 'step': 7074, 'epoch': 1} {'type': 'loss', 'content': 0.09234879910945892, 'timestamp': '2025-10-01 04:25:59.980106', 'step': 7075, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:00.053287', 'step': 7075, 'epoch': 1} {'type': 'loss', 'content': 0.09244733303785324, 'timestamp': '2025-10-01 04:26:00.065681', 'step': 7076, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:00.160310', 'step': 7076, 'epoch': 1} {'type': 'loss', 'content': 0.09611224383115768, 'timestamp': '2025-10-01 04:26:00.162574', 'step': 7077, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:26:00.218178', 'step': 7077, 'epoch': 1} {'type': 'loss', 'content': 0.1669916808605194, 'timestamp': '2025-10-01 04:26:00.220318', 'step': 7078, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:00.274678', 'step': 7078, 'epoch': 1} {'type': 'loss', 'content': 0.19523799419403076, 'timestamp': '2025-10-01 04:26:00.277597', 'step': 7079, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:00.338504', 'step': 7079, 'epoch': 1} {'type': 'loss', 'content': 0.08398056030273438, 'timestamp': '2025-10-01 04:26:00.344598', 'step': 7080, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:00.412800', 'step': 7080, 'epoch': 1} {'type': 'loss', 'content': 0.1824575513601303, 'timestamp': '2025-10-01 04:26:00.416561', 'step': 7081, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:00.490446', 'step': 7081, 'epoch': 1} {'type': 'loss', 'content': 0.16275696456432343, 'timestamp': '2025-10-01 04:26:00.492815', 'step': 7082, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:00.546712', 'step': 7082, 'epoch': 1} {'type': 'loss', 'content': 0.11222337186336517, 'timestamp': '2025-10-01 04:26:00.550319', 'step': 7083, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:00.604340', 'step': 7083, 'epoch': 1} {'type': 'loss', 'content': 0.16335856914520264, 'timestamp': '2025-10-01 04:26:00.610643', 'step': 7084, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:00.673438', 'step': 7084, 'epoch': 1} {'type': 'loss', 'content': 0.20049233734607697, 'timestamp': '2025-10-01 04:26:00.687203', 'step': 7085, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:00.740936', 'step': 7085, 'epoch': 1} {'type': 'loss', 'content': 0.16878075897693634, 'timestamp': '2025-10-01 04:26:00.743209', 'step': 7086, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:00.809431', 'step': 7086, 'epoch': 1} {'type': 'loss', 'content': 0.20392456650733948, 'timestamp': '2025-10-01 04:26:00.811628', 'step': 7087, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:00.865227', 'step': 7087, 'epoch': 1} {'type': 'loss', 'content': 0.13743092119693756, 'timestamp': '2025-10-01 04:26:00.871407', 'step': 7088, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:00.940441', 'step': 7088, 'epoch': 1} {'type': 'loss', 'content': 0.10982296615839005, 'timestamp': '2025-10-01 04:26:00.942603', 'step': 7089, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:00.995813', 'step': 7089, 'epoch': 1} {'type': 'loss', 'content': 0.06872711330652237, 'timestamp': '2025-10-01 04:26:00.998002', 'step': 7090, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:01.051846', 'step': 7090, 'epoch': 1} {'type': 'loss', 'content': 0.17170941829681396, 'timestamp': '2025-10-01 04:26:01.053831', 'step': 7091, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:01.110739', 'step': 7091, 'epoch': 1} {'type': 'loss', 'content': 0.12152592092752457, 'timestamp': '2025-10-01 04:26:01.116933', 'step': 7092, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:26:01.169698', 'step': 7092, 'epoch': 1} {'type': 'loss', 'content': 0.09620372205972672, 'timestamp': '2025-10-01 04:26:01.172064', 'step': 7093, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:01.240022', 'step': 7093, 'epoch': 1} {'type': 'loss', 'content': 0.13696616888046265, 'timestamp': '2025-10-01 04:26:01.253592', 'step': 7094, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:01.310597', 'step': 7094, 'epoch': 1} {'type': 'loss', 'content': 0.2121374011039734, 'timestamp': '2025-10-01 04:26:01.314260', 'step': 7095, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:26:01.367853', 'step': 7095, 'epoch': 1} {'type': 'loss', 'content': 0.14427600800991058, 'timestamp': '2025-10-01 04:26:01.375635', 'step': 7096, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:01.429851', 'step': 7096, 'epoch': 1} {'type': 'loss', 'content': 0.14783822000026703, 'timestamp': '2025-10-01 04:26:01.432074', 'step': 7097, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:01.485226', 'step': 7097, 'epoch': 1} {'type': 'loss', 'content': 0.20263630151748657, 'timestamp': '2025-10-01 04:26:01.488913', 'step': 7098, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:01.544781', 'step': 7098, 'epoch': 1} {'type': 'loss', 'content': 0.1271134614944458, 'timestamp': '2025-10-01 04:26:01.547180', 'step': 7099, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:01.619651', 'step': 7099, 'epoch': 1} {'type': 'loss', 'content': 0.1776617169380188, 'timestamp': '2025-10-01 04:26:01.626150', 'step': 7100, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:01.680837', 'step': 7100, 'epoch': 1} {'type': 'loss', 'content': 0.17646761238574982, 'timestamp': '2025-10-01 04:26:01.682980', 'step': 7101, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:01.753453', 'step': 7101, 'epoch': 1} {'type': 'loss', 'content': 0.2127731293439865, 'timestamp': '2025-10-01 04:26:01.755703', 'step': 7102, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:01.809350', 'step': 7102, 'epoch': 1} {'type': 'loss', 'content': 0.14816834032535553, 'timestamp': '2025-10-01 04:26:01.811655', 'step': 7103, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:26:01.866079', 'step': 7103, 'epoch': 1} {'type': 'loss', 'content': 0.21980060636997223, 'timestamp': '2025-10-01 04:26:01.872073', 'step': 7104, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:01.928112', 'step': 7104, 'epoch': 1} {'type': 'loss', 'content': 0.10196001827716827, 'timestamp': '2025-10-01 04:26:01.930420', 'step': 7105, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:01.984504', 'step': 7105, 'epoch': 1} {'type': 'loss', 'content': 0.1614166498184204, 'timestamp': '2025-10-01 04:26:01.986832', 'step': 7106, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:02.044182', 'step': 7106, 'epoch': 1} {'type': 'loss', 'content': 0.1286788135766983, 'timestamp': '2025-10-01 04:26:02.046785', 'step': 7107, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:26:02.112084', 'step': 7107, 'epoch': 1} {'type': 'loss', 'content': 0.18679389357566833, 'timestamp': '2025-10-01 04:26:02.118995', 'step': 7108, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:02.171294', 'step': 7108, 'epoch': 1} {'type': 'loss', 'content': 0.12291742861270905, 'timestamp': '2025-10-01 04:26:02.173776', 'step': 7109, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:26:02.228788', 'step': 7109, 'epoch': 1} {'type': 'loss', 'content': 0.1254764348268509, 'timestamp': '2025-10-01 04:26:02.231230', 'step': 7110, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:26:02.286471', 'step': 7110, 'epoch': 1} {'type': 'loss', 'content': 0.152012899518013, 'timestamp': '2025-10-01 04:26:02.288682', 'step': 7111, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:02.342036', 'step': 7111, 'epoch': 1} {'type': 'loss', 'content': 0.19447863101959229, 'timestamp': '2025-10-01 04:26:02.348127', 'step': 7112, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:02.401394', 'step': 7112, 'epoch': 1} {'type': 'loss', 'content': 0.1427789032459259, 'timestamp': '2025-10-01 04:26:02.403776', 'step': 7113, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:02.465125', 'step': 7113, 'epoch': 1} {'type': 'loss', 'content': 0.10527916997671127, 'timestamp': '2025-10-01 04:26:02.467879', 'step': 7114, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:02.521622', 'step': 7114, 'epoch': 1} {'type': 'loss', 'content': 0.19193150103092194, 'timestamp': '2025-10-01 04:26:02.523680', 'step': 7115, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:02.578807', 'step': 7115, 'epoch': 1} {'type': 'loss', 'content': 0.09220429509878159, 'timestamp': '2025-10-01 04:26:02.584618', 'step': 7116, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:02.637826', 'step': 7116, 'epoch': 1} {'type': 'loss', 'content': 0.20423860847949982, 'timestamp': '2025-10-01 04:26:02.640306', 'step': 7117, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:02.693505', 'step': 7117, 'epoch': 1} {'type': 'loss', 'content': 0.24205872416496277, 'timestamp': '2025-10-01 04:26:02.695707', 'step': 7118, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:02.763009', 'step': 7118, 'epoch': 1} {'type': 'loss', 'content': 0.0998455211520195, 'timestamp': '2025-10-01 04:26:02.765162', 'step': 7119, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:02.818583', 'step': 7119, 'epoch': 1} {'type': 'loss', 'content': 0.1202404797077179, 'timestamp': '2025-10-01 04:26:02.824300', 'step': 7120, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:02.877205', 'step': 7120, 'epoch': 1} {'type': 'loss', 'content': 0.1575930416584015, 'timestamp': '2025-10-01 04:26:02.879407', 'step': 7121, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:02.932659', 'step': 7121, 'epoch': 1} {'type': 'loss', 'content': 0.11339152604341507, 'timestamp': '2025-10-01 04:26:02.934863', 'step': 7122, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:02.988624', 'step': 7122, 'epoch': 1} {'type': 'loss', 'content': 0.14722400903701782, 'timestamp': '2025-10-01 04:26:02.993116', 'step': 7123, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:03.048942', 'step': 7123, 'epoch': 1} {'type': 'loss', 'content': 0.1314617544412613, 'timestamp': '2025-10-01 04:26:03.054613', 'step': 7124, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:03.107919', 'step': 7124, 'epoch': 1} {'type': 'loss', 'content': 0.10595911741256714, 'timestamp': '2025-10-01 04:26:03.110184', 'step': 7125, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:03.164164', 'step': 7125, 'epoch': 1} {'type': 'loss', 'content': 0.10795650631189346, 'timestamp': '2025-10-01 04:26:03.167123', 'step': 7126, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:03.220891', 'step': 7126, 'epoch': 1} {'type': 'loss', 'content': 0.12194712460041046, 'timestamp': '2025-10-01 04:26:03.222740', 'step': 7127, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:03.275467', 'step': 7127, 'epoch': 1} {'type': 'loss', 'content': 0.24003438651561737, 'timestamp': '2025-10-01 04:26:03.280996', 'step': 7128, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:26:03.334568', 'step': 7128, 'epoch': 1} {'type': 'loss', 'content': 0.14929839968681335, 'timestamp': '2025-10-01 04:26:03.337243', 'step': 7129, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:03.398274', 'step': 7129, 'epoch': 1} {'type': 'loss', 'content': 0.22270731627941132, 'timestamp': '2025-10-01 04:26:03.400894', 'step': 7130, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:03.456266', 'step': 7130, 'epoch': 1} {'type': 'loss', 'content': 0.10612940043210983, 'timestamp': '2025-10-01 04:26:03.459643', 'step': 7131, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:03.515947', 'step': 7131, 'epoch': 1} {'type': 'loss', 'content': 0.18349772691726685, 'timestamp': '2025-10-01 04:26:03.521956', 'step': 7132, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:03.576908', 'step': 7132, 'epoch': 1} {'type': 'loss', 'content': 0.16535449028015137, 'timestamp': '2025-10-01 04:26:03.579047', 'step': 7133, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:03.633598', 'step': 7133, 'epoch': 1} {'type': 'loss', 'content': 0.12105293571949005, 'timestamp': '2025-10-01 04:26:03.635485', 'step': 7134, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:03.690024', 'step': 7134, 'epoch': 1} {'type': 'loss', 'content': 0.17303580045700073, 'timestamp': '2025-10-01 04:26:03.707401', 'step': 7135, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:03.778063', 'step': 7135, 'epoch': 1} {'type': 'loss', 'content': 0.14917060732841492, 'timestamp': '2025-10-01 04:26:03.784599', 'step': 7136, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:03.839739', 'step': 7136, 'epoch': 1} {'type': 'loss', 'content': 0.10596174001693726, 'timestamp': '2025-10-01 04:26:03.842712', 'step': 7137, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:26:03.899450', 'step': 7137, 'epoch': 1} {'type': 'loss', 'content': 0.10554482042789459, 'timestamp': '2025-10-01 04:26:03.901783', 'step': 7138, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:26:03.955879', 'step': 7138, 'epoch': 1} {'type': 'loss', 'content': 0.07321092486381531, 'timestamp': '2025-10-01 04:26:03.965929', 'step': 7139, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:04.031335', 'step': 7139, 'epoch': 1} {'type': 'loss', 'content': 0.12476294487714767, 'timestamp': '2025-10-01 04:26:04.037367', 'step': 7140, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:04.091908', 'step': 7140, 'epoch': 1} {'type': 'loss', 'content': 0.12306008487939835, 'timestamp': '2025-10-01 04:26:04.093986', 'step': 7141, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:04.149084', 'step': 7141, 'epoch': 1} {'type': 'loss', 'content': 0.09735139459371567, 'timestamp': '2025-10-01 04:26:04.151662', 'step': 7142, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:04.206786', 'step': 7142, 'epoch': 1} {'type': 'loss', 'content': 0.20646923780441284, 'timestamp': '2025-10-01 04:26:04.209171', 'step': 7143, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:04.270178', 'step': 7143, 'epoch': 1} {'type': 'loss', 'content': 0.16020867228507996, 'timestamp': '2025-10-01 04:26:04.276246', 'step': 7144, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:04.330808', 'step': 7144, 'epoch': 1} {'type': 'loss', 'content': 0.10226188600063324, 'timestamp': '2025-10-01 04:26:04.333241', 'step': 7145, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:26:04.387384', 'step': 7145, 'epoch': 1} {'type': 'loss', 'content': 0.1904723048210144, 'timestamp': '2025-10-01 04:26:04.390974', 'step': 7146, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:26:04.445789', 'step': 7146, 'epoch': 1} {'type': 'loss', 'content': 0.09638034552335739, 'timestamp': '2025-10-01 04:26:04.457094', 'step': 7147, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:04.513199', 'step': 7147, 'epoch': 1} {'type': 'loss', 'content': 0.09140641987323761, 'timestamp': '2025-10-01 04:26:04.518974', 'step': 7148, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:26:04.573831', 'step': 7148, 'epoch': 1} {'type': 'loss', 'content': 0.18683651089668274, 'timestamp': '2025-10-01 04:26:04.576790', 'step': 7149, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:04.632129', 'step': 7149, 'epoch': 1} {'type': 'loss', 'content': 0.12937863171100616, 'timestamp': '2025-10-01 04:26:04.634903', 'step': 7150, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:04.690154', 'step': 7150, 'epoch': 1} {'type': 'loss', 'content': 0.1363358050584793, 'timestamp': '2025-10-01 04:26:04.692638', 'step': 7151, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:04.747311', 'step': 7151, 'epoch': 1} {'type': 'loss', 'content': 0.21947844326496124, 'timestamp': '2025-10-01 04:26:04.753905', 'step': 7152, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:04.826164', 'step': 7152, 'epoch': 1} {'type': 'loss', 'content': 0.12084474414587021, 'timestamp': '2025-10-01 04:26:04.828554', 'step': 7153, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:26:04.883892', 'step': 7153, 'epoch': 1} {'type': 'loss', 'content': 0.12534017860889435, 'timestamp': '2025-10-01 04:26:04.886402', 'step': 7154, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:26:04.940524', 'step': 7154, 'epoch': 1} {'type': 'loss', 'content': 0.11620688438415527, 'timestamp': '2025-10-01 04:26:04.943555', 'step': 7155, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:04.998755', 'step': 7155, 'epoch': 1} {'type': 'loss', 'content': 0.1129746064543724, 'timestamp': '2025-10-01 04:26:05.009412', 'step': 7156, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:05.063084', 'step': 7156, 'epoch': 1} {'type': 'loss', 'content': 0.13245335221290588, 'timestamp': '2025-10-01 04:26:05.065946', 'step': 7157, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-01 04:26:05.121130', 'step': 7157, 'epoch': 1} {'type': 'loss', 'content': 0.19293031096458435, 'timestamp': '2025-10-01 04:26:05.123615', 'step': 7158, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:26:05.178408', 'step': 7158, 'epoch': 1} {'type': 'loss', 'content': 0.21155567467212677, 'timestamp': '2025-10-01 04:26:05.180865', 'step': 7159, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:05.236626', 'step': 7159, 'epoch': 1} {'type': 'loss', 'content': 0.1036718487739563, 'timestamp': '2025-10-01 04:26:05.242479', 'step': 7160, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:05.296762', 'step': 7160, 'epoch': 1} {'type': 'loss', 'content': 0.2906762361526489, 'timestamp': '2025-10-01 04:26:05.298591', 'step': 7161, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:05.352702', 'step': 7161, 'epoch': 1} {'type': 'loss', 'content': 0.16269713640213013, 'timestamp': '2025-10-01 04:26:05.355178', 'step': 7162, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:05.413803', 'step': 7162, 'epoch': 1} {'type': 'loss', 'content': 0.18875740468502045, 'timestamp': '2025-10-01 04:26:05.416261', 'step': 7163, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:05.473035', 'step': 7163, 'epoch': 1} {'type': 'loss', 'content': 0.14847297966480255, 'timestamp': '2025-10-01 04:26:05.479467', 'step': 7164, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:05.533728', 'step': 7164, 'epoch': 1} {'type': 'loss', 'content': 0.13251212239265442, 'timestamp': '2025-10-01 04:26:05.536093', 'step': 7165, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:05.590250', 'step': 7165, 'epoch': 1} {'type': 'loss', 'content': 0.11633611470460892, 'timestamp': '2025-10-01 04:26:05.592657', 'step': 7166, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:05.647191', 'step': 7166, 'epoch': 1} {'type': 'loss', 'content': 0.26116716861724854, 'timestamp': '2025-10-01 04:26:05.649018', 'step': 7167, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:26:05.713624', 'step': 7167, 'epoch': 1} {'type': 'loss', 'content': 0.14562323689460754, 'timestamp': '2025-10-01 04:26:05.719377', 'step': 7168, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:26:05.773374', 'step': 7168, 'epoch': 1} {'type': 'loss', 'content': 0.18742211163043976, 'timestamp': '2025-10-01 04:26:05.777875', 'step': 7169, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:26:05.831548', 'step': 7169, 'epoch': 1} {'type': 'loss', 'content': 0.11223690956830978, 'timestamp': '2025-10-01 04:26:05.833912', 'step': 7170, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:05.897019', 'step': 7170, 'epoch': 1} {'type': 'loss', 'content': 0.12190235406160355, 'timestamp': '2025-10-01 04:26:05.899486', 'step': 7171, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:05.964109', 'step': 7171, 'epoch': 1} {'type': 'loss', 'content': 0.2124122828245163, 'timestamp': '2025-10-01 04:26:05.970277', 'step': 7172, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:06.024009', 'step': 7172, 'epoch': 1} {'type': 'loss', 'content': 0.17271500825881958, 'timestamp': '2025-10-01 04:26:06.026186', 'step': 7173, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:06.088966', 'step': 7173, 'epoch': 1} {'type': 'loss', 'content': 0.14725323021411896, 'timestamp': '2025-10-01 04:26:06.090776', 'step': 7174, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:06.144373', 'step': 7174, 'epoch': 1} {'type': 'loss', 'content': 0.13929131627082825, 'timestamp': '2025-10-01 04:26:06.146443', 'step': 7175, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:06.199607', 'step': 7175, 'epoch': 1} {'type': 'loss', 'content': 0.15610595047473907, 'timestamp': '2025-10-01 04:26:06.205724', 'step': 7176, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:06.258597', 'step': 7176, 'epoch': 1} {'type': 'loss', 'content': 0.16593722999095917, 'timestamp': '2025-10-01 04:26:06.260523', 'step': 7177, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:06.313886', 'step': 7177, 'epoch': 1} {'type': 'loss', 'content': 0.11207693070173264, 'timestamp': '2025-10-01 04:26:06.316302', 'step': 7178, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:06.370834', 'step': 7178, 'epoch': 1} {'type': 'loss', 'content': 0.10036902129650116, 'timestamp': '2025-10-01 04:26:06.373031', 'step': 7179, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:06.425833', 'step': 7179, 'epoch': 1} {'type': 'loss', 'content': 0.12890759110450745, 'timestamp': '2025-10-01 04:26:06.431809', 'step': 7180, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:06.485166', 'step': 7180, 'epoch': 1} {'type': 'loss', 'content': 0.21823056042194366, 'timestamp': '2025-10-01 04:26:06.498377', 'step': 7181, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:06.551860', 'step': 7181, 'epoch': 1} {'type': 'loss', 'content': 0.14252543449401855, 'timestamp': '2025-10-01 04:26:06.554250', 'step': 7182, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:06.608342', 'step': 7182, 'epoch': 1} {'type': 'loss', 'content': 0.141487255692482, 'timestamp': '2025-10-01 04:26:06.610496', 'step': 7183, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:26:06.665101', 'step': 7183, 'epoch': 1} {'type': 'loss', 'content': 0.2072490155696869, 'timestamp': '2025-10-01 04:26:06.670949', 'step': 7184, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:06.723808', 'step': 7184, 'epoch': 1} {'type': 'loss', 'content': 0.1783837080001831, 'timestamp': '2025-10-01 04:26:06.726478', 'step': 7185, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:06.780031', 'step': 7185, 'epoch': 1} {'type': 'loss', 'content': 0.09259343892335892, 'timestamp': '2025-10-01 04:26:06.782212', 'step': 7186, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:06.837522', 'step': 7186, 'epoch': 1} {'type': 'loss', 'content': 0.1837618052959442, 'timestamp': '2025-10-01 04:26:06.839484', 'step': 7187, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:06.895771', 'step': 7187, 'epoch': 1} {'type': 'loss', 'content': 0.14773626625537872, 'timestamp': '2025-10-01 04:26:06.901045', 'step': 7188, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:06.955235', 'step': 7188, 'epoch': 1} {'type': 'loss', 'content': 0.260154128074646, 'timestamp': '2025-10-01 04:26:06.957076', 'step': 7189, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:07.015931', 'step': 7189, 'epoch': 1} {'type': 'loss', 'content': 0.11915095895528793, 'timestamp': '2025-10-01 04:26:07.018067', 'step': 7190, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:07.071605', 'step': 7190, 'epoch': 1} {'type': 'loss', 'content': 0.1773098260164261, 'timestamp': '2025-10-01 04:26:07.074010', 'step': 7191, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:07.127258', 'step': 7191, 'epoch': 1} {'type': 'loss', 'content': 0.18761184811592102, 'timestamp': '2025-10-01 04:26:07.132863', 'step': 7192, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:26:07.185328', 'step': 7192, 'epoch': 1} {'type': 'loss', 'content': 0.1975642442703247, 'timestamp': '2025-10-01 04:26:07.187659', 'step': 7193, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:07.241201', 'step': 7193, 'epoch': 1} {'type': 'loss', 'content': 0.15808334946632385, 'timestamp': '2025-10-01 04:26:07.243776', 'step': 7194, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:07.308276', 'step': 7194, 'epoch': 1} {'type': 'loss', 'content': 0.19077260792255402, 'timestamp': '2025-10-01 04:26:07.310438', 'step': 7195, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:26:07.365258', 'step': 7195, 'epoch': 1} {'type': 'loss', 'content': 0.16984841227531433, 'timestamp': '2025-10-01 04:26:07.371494', 'step': 7196, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:07.424642', 'step': 7196, 'epoch': 1} {'type': 'loss', 'content': 0.1966540664434433, 'timestamp': '2025-10-01 04:26:07.427381', 'step': 7197, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:07.494461', 'step': 7197, 'epoch': 1} {'type': 'loss', 'content': 0.07271502912044525, 'timestamp': '2025-10-01 04:26:07.496724', 'step': 7198, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:26:07.551976', 'step': 7198, 'epoch': 1} {'type': 'loss', 'content': 0.13780401647090912, 'timestamp': '2025-10-01 04:26:07.554234', 'step': 7199, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:07.607301', 'step': 7199, 'epoch': 1} {'type': 'loss', 'content': 0.13856247067451477, 'timestamp': '2025-10-01 04:26:07.613084', 'step': 7200, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:07.665595', 'step': 7200, 'epoch': 1} {'type': 'loss', 'content': 0.07070870697498322, 'timestamp': '2025-10-01 04:26:07.680170', 'step': 7201, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:26:07.733476', 'step': 7201, 'epoch': 1} {'type': 'loss', 'content': 0.14666739106178284, 'timestamp': '2025-10-01 04:26:07.736595', 'step': 7202, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:07.792629', 'step': 7202, 'epoch': 1} {'type': 'loss', 'content': 0.24366332590579987, 'timestamp': '2025-10-01 04:26:07.795603', 'step': 7203, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:07.849004', 'step': 7203, 'epoch': 1} {'type': 'loss', 'content': 0.20013882219791412, 'timestamp': '2025-10-01 04:26:07.855646', 'step': 7204, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:07.908779', 'step': 7204, 'epoch': 1} {'type': 'loss', 'content': 0.22324225306510925, 'timestamp': '2025-10-01 04:26:07.910920', 'step': 7205, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:07.963561', 'step': 7205, 'epoch': 1} {'type': 'loss', 'content': 0.14340297877788544, 'timestamp': '2025-10-01 04:26:07.965786', 'step': 7206, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:08.019394', 'step': 7206, 'epoch': 1} {'type': 'loss', 'content': 0.2197999358177185, 'timestamp': '2025-10-01 04:26:08.021602', 'step': 7207, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:08.075200', 'step': 7207, 'epoch': 1} {'type': 'loss', 'content': 0.1695280522108078, 'timestamp': '2025-10-01 04:26:08.080916', 'step': 7208, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:26:08.134225', 'step': 7208, 'epoch': 1} {'type': 'loss', 'content': 0.1672637015581131, 'timestamp': '2025-10-01 04:26:08.136360', 'step': 7209, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:26:08.190386', 'step': 7209, 'epoch': 1} {'type': 'loss', 'content': 0.1449066698551178, 'timestamp': '2025-10-01 04:26:08.192351', 'step': 7210, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:08.245901', 'step': 7210, 'epoch': 1} {'type': 'loss', 'content': 0.08582858741283417, 'timestamp': '2025-10-01 04:26:08.248497', 'step': 7211, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:08.301779', 'step': 7211, 'epoch': 1} {'type': 'loss', 'content': 0.12496909499168396, 'timestamp': '2025-10-01 04:26:08.307619', 'step': 7212, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:08.360871', 'step': 7212, 'epoch': 1} {'type': 'loss', 'content': 0.1519521027803421, 'timestamp': '2025-10-01 04:26:08.362959', 'step': 7213, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:08.416082', 'step': 7213, 'epoch': 1} {'type': 'loss', 'content': 0.1798279881477356, 'timestamp': '2025-10-01 04:26:08.418262', 'step': 7214, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:08.471899', 'step': 7214, 'epoch': 1} {'type': 'loss', 'content': 0.10656648874282837, 'timestamp': '2025-10-01 04:26:08.474124', 'step': 7215, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:08.528988', 'step': 7215, 'epoch': 1} {'type': 'loss', 'content': 0.13186191022396088, 'timestamp': '2025-10-01 04:26:08.534791', 'step': 7216, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:08.589495', 'step': 7216, 'epoch': 1} {'type': 'loss', 'content': 0.16943734884262085, 'timestamp': '2025-10-01 04:26:08.592362', 'step': 7217, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:08.645998', 'step': 7217, 'epoch': 1} {'type': 'loss', 'content': 0.09496357291936874, 'timestamp': '2025-10-01 04:26:08.648005', 'step': 7218, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:08.701701', 'step': 7218, 'epoch': 1} {'type': 'loss', 'content': 0.1422411948442459, 'timestamp': '2025-10-01 04:26:08.704003', 'step': 7219, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:26:08.758122', 'step': 7219, 'epoch': 1} {'type': 'loss', 'content': 0.15661755204200745, 'timestamp': '2025-10-01 04:26:08.763893', 'step': 7220, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:08.816701', 'step': 7220, 'epoch': 1} {'type': 'loss', 'content': 0.1567695289850235, 'timestamp': '2025-10-01 04:26:08.820080', 'step': 7221, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:08.872854', 'step': 7221, 'epoch': 1} {'type': 'loss', 'content': 0.15047703683376312, 'timestamp': '2025-10-01 04:26:08.874984', 'step': 7222, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:08.928299', 'step': 7222, 'epoch': 1} {'type': 'loss', 'content': 0.16229180991649628, 'timestamp': '2025-10-01 04:26:08.930811', 'step': 7223, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:26:08.985154', 'step': 7223, 'epoch': 1} {'type': 'loss', 'content': 0.10880593955516815, 'timestamp': '2025-10-01 04:26:08.990958', 'step': 7224, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:09.045342', 'step': 7224, 'epoch': 1} {'type': 'loss', 'content': 0.20344240963459015, 'timestamp': '2025-10-01 04:26:09.047877', 'step': 7225, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:09.101193', 'step': 7225, 'epoch': 1} {'type': 'loss', 'content': 0.19318914413452148, 'timestamp': '2025-10-01 04:26:09.103455', 'step': 7226, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:09.157054', 'step': 7226, 'epoch': 1} {'type': 'loss', 'content': 0.15744678676128387, 'timestamp': '2025-10-01 04:26:09.159283', 'step': 7227, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:09.213404', 'step': 7227, 'epoch': 1} {'type': 'loss', 'content': 0.09977316111326218, 'timestamp': '2025-10-01 04:26:09.219260', 'step': 7228, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:26:09.282352', 'step': 7228, 'epoch': 1} {'type': 'loss', 'content': 0.14557310938835144, 'timestamp': '2025-10-01 04:26:09.285028', 'step': 7229, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:09.338484', 'step': 7229, 'epoch': 1} {'type': 'loss', 'content': 0.1305130571126938, 'timestamp': '2025-10-01 04:26:09.341238', 'step': 7230, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:09.397119', 'step': 7230, 'epoch': 1} {'type': 'loss', 'content': 0.16645145416259766, 'timestamp': '2025-10-01 04:26:09.400315', 'step': 7231, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:09.454164', 'step': 7231, 'epoch': 1} {'type': 'loss', 'content': 0.10563771426677704, 'timestamp': '2025-10-01 04:26:09.459991', 'step': 7232, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:09.513833', 'step': 7232, 'epoch': 1} {'type': 'loss', 'content': 0.12009090930223465, 'timestamp': '2025-10-01 04:26:09.516501', 'step': 7233, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:09.570106', 'step': 7233, 'epoch': 1} {'type': 'loss', 'content': 0.10868559777736664, 'timestamp': '2025-10-01 04:26:09.572331', 'step': 7234, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:09.626136', 'step': 7234, 'epoch': 1} {'type': 'loss', 'content': 0.08626405149698257, 'timestamp': '2025-10-01 04:26:09.628469', 'step': 7235, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:09.684063', 'step': 7235, 'epoch': 1} {'type': 'loss', 'content': 0.11705861240625381, 'timestamp': '2025-10-01 04:26:09.689697', 'step': 7236, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:09.743877', 'step': 7236, 'epoch': 1} {'type': 'loss', 'content': 0.13134895265102386, 'timestamp': '2025-10-01 04:26:09.746497', 'step': 7237, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:09.799589', 'step': 7237, 'epoch': 1} {'type': 'loss', 'content': 0.08747614175081253, 'timestamp': '2025-10-01 04:26:09.801788', 'step': 7238, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:09.855003', 'step': 7238, 'epoch': 1} {'type': 'loss', 'content': 0.14619049429893494, 'timestamp': '2025-10-01 04:26:09.857263', 'step': 7239, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:26:09.911207', 'step': 7239, 'epoch': 1} {'type': 'loss', 'content': 0.21900413930416107, 'timestamp': '2025-10-01 04:26:09.917261', 'step': 7240, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:26:09.980565', 'step': 7240, 'epoch': 1} {'type': 'loss', 'content': 0.12862350046634674, 'timestamp': '2025-10-01 04:26:09.982635', 'step': 7241, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:10.036598', 'step': 7241, 'epoch': 1} {'type': 'loss', 'content': 0.07601621001958847, 'timestamp': '2025-10-01 04:26:10.038882', 'step': 7242, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:10.093010', 'step': 7242, 'epoch': 1} {'type': 'loss', 'content': 0.22250138223171234, 'timestamp': '2025-10-01 04:26:10.096434', 'step': 7243, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:10.151122', 'step': 7243, 'epoch': 1} {'type': 'loss', 'content': 0.11325886100530624, 'timestamp': '2025-10-01 04:26:10.156733', 'step': 7244, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:10.210446', 'step': 7244, 'epoch': 1} {'type': 'loss', 'content': 0.19426441192626953, 'timestamp': '2025-10-01 04:26:10.212521', 'step': 7245, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:10.266000', 'step': 7245, 'epoch': 1} {'type': 'loss', 'content': 0.1653365194797516, 'timestamp': '2025-10-01 04:26:10.268143', 'step': 7246, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:10.332600', 'step': 7246, 'epoch': 1} {'type': 'loss', 'content': 0.09688658267259598, 'timestamp': '2025-10-01 04:26:10.334880', 'step': 7247, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:10.388648', 'step': 7247, 'epoch': 1} {'type': 'loss', 'content': 0.2280626744031906, 'timestamp': '2025-10-01 04:26:10.394926', 'step': 7248, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:26:10.450413', 'step': 7248, 'epoch': 1} {'type': 'loss', 'content': 0.11705175042152405, 'timestamp': '2025-10-01 04:26:10.452480', 'step': 7249, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:10.506863', 'step': 7249, 'epoch': 1} {'type': 'loss', 'content': 0.11507391929626465, 'timestamp': '2025-10-01 04:26:10.508836', 'step': 7250, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:10.572018', 'step': 7250, 'epoch': 1} {'type': 'loss', 'content': 0.12124723941087723, 'timestamp': '2025-10-01 04:26:10.574210', 'step': 7251, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:10.628933', 'step': 7251, 'epoch': 1} {'type': 'loss', 'content': 0.09528004378080368, 'timestamp': '2025-10-01 04:26:10.635010', 'step': 7252, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:10.688196', 'step': 7252, 'epoch': 1} {'type': 'loss', 'content': 0.19026951491832733, 'timestamp': '2025-10-01 04:26:10.690891', 'step': 7253, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:26:10.744699', 'step': 7253, 'epoch': 1} {'type': 'loss', 'content': 0.14789532124996185, 'timestamp': '2025-10-01 04:26:10.747136', 'step': 7254, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:10.801207', 'step': 7254, 'epoch': 1} {'type': 'loss', 'content': 0.09808117151260376, 'timestamp': '2025-10-01 04:26:10.803929', 'step': 7255, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:10.857566', 'step': 7255, 'epoch': 1} {'type': 'loss', 'content': 0.17892451584339142, 'timestamp': '2025-10-01 04:26:10.864057', 'step': 7256, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:10.926975', 'step': 7256, 'epoch': 1} {'type': 'loss', 'content': 0.1092098280787468, 'timestamp': '2025-10-01 04:26:10.929104', 'step': 7257, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:10.983189', 'step': 7257, 'epoch': 1} {'type': 'loss', 'content': 0.19840385019779205, 'timestamp': '2025-10-01 04:26:10.985469', 'step': 7258, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:11.050197', 'step': 7258, 'epoch': 1} {'type': 'loss', 'content': 0.19148331880569458, 'timestamp': '2025-10-01 04:26:11.064364', 'step': 7259, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:11.118111', 'step': 7259, 'epoch': 1} {'type': 'loss', 'content': 0.24206192791461945, 'timestamp': '2025-10-01 04:26:11.123955', 'step': 7260, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:11.177341', 'step': 7260, 'epoch': 1} {'type': 'loss', 'content': 0.29257798194885254, 'timestamp': '2025-10-01 04:26:11.179962', 'step': 7261, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:11.246556', 'step': 7261, 'epoch': 1} {'type': 'loss', 'content': 0.11702809482812881, 'timestamp': '2025-10-01 04:26:11.248788', 'step': 7262, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:11.303006', 'step': 7262, 'epoch': 1} {'type': 'loss', 'content': 0.18101966381072998, 'timestamp': '2025-10-01 04:26:11.305382', 'step': 7263, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:11.358885', 'step': 7263, 'epoch': 1} {'type': 'loss', 'content': 0.13565006852149963, 'timestamp': '2025-10-01 04:26:11.365125', 'step': 7264, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:11.418701', 'step': 7264, 'epoch': 1} {'type': 'loss', 'content': 0.2273193597793579, 'timestamp': '2025-10-01 04:26:11.420789', 'step': 7265, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:11.474242', 'step': 7265, 'epoch': 1} {'type': 'loss', 'content': 0.14729061722755432, 'timestamp': '2025-10-01 04:26:11.476342', 'step': 7266, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:11.530130', 'step': 7266, 'epoch': 1} {'type': 'loss', 'content': 0.07337278127670288, 'timestamp': '2025-10-01 04:26:11.532249', 'step': 7267, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:11.585600', 'step': 7267, 'epoch': 1} {'type': 'loss', 'content': 0.12780620157718658, 'timestamp': '2025-10-01 04:26:11.601796', 'step': 7268, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:11.655204', 'step': 7268, 'epoch': 1} {'type': 'loss', 'content': 0.2473956048488617, 'timestamp': '2025-10-01 04:26:11.657971', 'step': 7269, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:11.713185', 'step': 7269, 'epoch': 1} {'type': 'loss', 'content': 0.14436203241348267, 'timestamp': '2025-10-01 04:26:11.715523', 'step': 7270, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:11.769568', 'step': 7270, 'epoch': 1} {'type': 'loss', 'content': 0.19187228381633759, 'timestamp': '2025-10-01 04:26:11.773467', 'step': 7271, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:26:11.831334', 'step': 7271, 'epoch': 1} {'type': 'loss', 'content': 0.15812760591506958, 'timestamp': '2025-10-01 04:26:11.837372', 'step': 7272, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:11.903877', 'step': 7272, 'epoch': 1} {'type': 'loss', 'content': 0.14886042475700378, 'timestamp': '2025-10-01 04:26:11.906275', 'step': 7273, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:11.963457', 'step': 7273, 'epoch': 1} {'type': 'loss', 'content': 0.14074745774269104, 'timestamp': '2025-10-01 04:26:11.966922', 'step': 7274, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:12.033167', 'step': 7274, 'epoch': 1} {'type': 'loss', 'content': 0.08928118646144867, 'timestamp': '2025-10-01 04:26:12.035454', 'step': 7275, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:12.105311', 'step': 7275, 'epoch': 1} {'type': 'loss', 'content': 0.19199636578559875, 'timestamp': '2025-10-01 04:26:12.112025', 'step': 7276, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:12.167315', 'step': 7276, 'epoch': 1} {'type': 'loss', 'content': 0.10760550945997238, 'timestamp': '2025-10-01 04:26:12.169482', 'step': 7277, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:26:12.226439', 'step': 7277, 'epoch': 1} {'type': 'loss', 'content': 0.2130080908536911, 'timestamp': '2025-10-01 04:26:12.228967', 'step': 7278, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:12.285278', 'step': 7278, 'epoch': 1} {'type': 'loss', 'content': 0.12011680752038956, 'timestamp': '2025-10-01 04:26:12.287427', 'step': 7279, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:12.343758', 'step': 7279, 'epoch': 1} {'type': 'loss', 'content': 0.24735191464424133, 'timestamp': '2025-10-01 04:26:12.349663', 'step': 7280, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:12.411676', 'step': 7280, 'epoch': 1} {'type': 'loss', 'content': 0.2785513401031494, 'timestamp': '2025-10-01 04:26:12.413831', 'step': 7281, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:12.467401', 'step': 7281, 'epoch': 1} {'type': 'loss', 'content': 0.1184251680970192, 'timestamp': '2025-10-01 04:26:12.472162', 'step': 7282, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:12.527323', 'step': 7282, 'epoch': 1} {'type': 'loss', 'content': 0.16572941839694977, 'timestamp': '2025-10-01 04:26:12.530043', 'step': 7283, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:12.584860', 'step': 7283, 'epoch': 1} {'type': 'loss', 'content': 0.07320848107337952, 'timestamp': '2025-10-01 04:26:12.591614', 'step': 7284, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:12.646674', 'step': 7284, 'epoch': 1} {'type': 'loss', 'content': 0.06253428757190704, 'timestamp': '2025-10-01 04:26:12.649192', 'step': 7285, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:26:12.704781', 'step': 7285, 'epoch': 1} {'type': 'loss', 'content': 0.14034275710582733, 'timestamp': '2025-10-01 04:26:12.708454', 'step': 7286, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:12.763294', 'step': 7286, 'epoch': 1} {'type': 'loss', 'content': 0.11643922328948975, 'timestamp': '2025-10-01 04:26:12.765767', 'step': 7287, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:12.822066', 'step': 7287, 'epoch': 1} {'type': 'loss', 'content': 0.11830965429544449, 'timestamp': '2025-10-01 04:26:12.828244', 'step': 7288, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:26:12.885258', 'step': 7288, 'epoch': 1} {'type': 'loss', 'content': 0.09676908701658249, 'timestamp': '2025-10-01 04:26:12.887772', 'step': 7289, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:12.943814', 'step': 7289, 'epoch': 1} {'type': 'loss', 'content': 0.16687436401844025, 'timestamp': '2025-10-01 04:26:12.949266', 'step': 7290, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:13.004177', 'step': 7290, 'epoch': 1} {'type': 'loss', 'content': 0.2124946564435959, 'timestamp': '2025-10-01 04:26:13.006364', 'step': 7291, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:13.066359', 'step': 7291, 'epoch': 1} {'type': 'loss', 'content': 0.18529771268367767, 'timestamp': '2025-10-01 04:26:13.072656', 'step': 7292, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:13.140649', 'step': 7292, 'epoch': 1} {'type': 'loss', 'content': 0.15968790650367737, 'timestamp': '2025-10-01 04:26:13.146331', 'step': 7293, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:13.199959', 'step': 7293, 'epoch': 1} {'type': 'loss', 'content': 0.1080738753080368, 'timestamp': '2025-10-01 04:26:13.204136', 'step': 7294, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:13.260847', 'step': 7294, 'epoch': 1} {'type': 'loss', 'content': 0.17525812983512878, 'timestamp': '2025-10-01 04:26:13.263552', 'step': 7295, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:13.317778', 'step': 7295, 'epoch': 1} {'type': 'loss', 'content': 0.10165145248174667, 'timestamp': '2025-10-01 04:26:13.324205', 'step': 7296, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:13.391959', 'step': 7296, 'epoch': 1} {'type': 'loss', 'content': 0.14583593606948853, 'timestamp': '2025-10-01 04:26:13.394587', 'step': 7297, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:13.454006', 'step': 7297, 'epoch': 1} {'type': 'loss', 'content': 0.15970030426979065, 'timestamp': '2025-10-01 04:26:13.456724', 'step': 7298, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:13.512553', 'step': 7298, 'epoch': 1} {'type': 'loss', 'content': 0.08771950751543045, 'timestamp': '2025-10-01 04:26:13.514996', 'step': 7299, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:13.571033', 'step': 7299, 'epoch': 1} {'type': 'loss', 'content': 0.11240560561418533, 'timestamp': '2025-10-01 04:26:13.577698', 'step': 7300, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:13.634798', 'step': 7300, 'epoch': 1} {'type': 'loss', 'content': 0.1627895087003708, 'timestamp': '2025-10-01 04:26:13.637018', 'step': 7301, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:13.692459', 'step': 7301, 'epoch': 1} {'type': 'loss', 'content': 0.1671389639377594, 'timestamp': '2025-10-01 04:26:13.694585', 'step': 7302, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:26:13.751474', 'step': 7302, 'epoch': 1} {'type': 'loss', 'content': 0.1632733792066574, 'timestamp': '2025-10-01 04:26:13.757118', 'step': 7303, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:13.815742', 'step': 7303, 'epoch': 1} {'type': 'loss', 'content': 0.11504616588354111, 'timestamp': '2025-10-01 04:26:13.821662', 'step': 7304, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:26:13.875223', 'step': 7304, 'epoch': 1} {'type': 'loss', 'content': 0.16646340489387512, 'timestamp': '2025-10-01 04:26:13.877374', 'step': 7305, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:13.931038', 'step': 7305, 'epoch': 1} {'type': 'loss', 'content': 0.21197791397571564, 'timestamp': '2025-10-01 04:26:13.933147', 'step': 7306, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:13.991117', 'step': 7306, 'epoch': 1} {'type': 'loss', 'content': 0.15308529138565063, 'timestamp': '2025-10-01 04:26:13.993117', 'step': 7307, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:14.046269', 'step': 7307, 'epoch': 1} {'type': 'loss', 'content': 0.14881925284862518, 'timestamp': '2025-10-01 04:26:14.052279', 'step': 7308, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:14.106668', 'step': 7308, 'epoch': 1} {'type': 'loss', 'content': 0.17973774671554565, 'timestamp': '2025-10-01 04:26:14.120295', 'step': 7309, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:14.181624', 'step': 7309, 'epoch': 1} {'type': 'loss', 'content': 0.19689075648784637, 'timestamp': '2025-10-01 04:26:14.195905', 'step': 7310, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:14.268558', 'step': 7310, 'epoch': 1} {'type': 'loss', 'content': 0.13445313274860382, 'timestamp': '2025-10-01 04:26:14.270830', 'step': 7311, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:14.324888', 'step': 7311, 'epoch': 1} {'type': 'loss', 'content': 0.19598764181137085, 'timestamp': '2025-10-01 04:26:14.331028', 'step': 7312, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:26:14.389579', 'step': 7312, 'epoch': 1} {'type': 'loss', 'content': 0.16887526214122772, 'timestamp': '2025-10-01 04:26:14.391915', 'step': 7313, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:14.464624', 'step': 7313, 'epoch': 1} {'type': 'loss', 'content': 0.19064666330814362, 'timestamp': '2025-10-01 04:26:14.466560', 'step': 7314, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:14.519928', 'step': 7314, 'epoch': 1} {'type': 'loss', 'content': 0.2182006984949112, 'timestamp': '2025-10-01 04:26:14.521985', 'step': 7315, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:14.575616', 'step': 7315, 'epoch': 1} {'type': 'loss', 'content': 0.1183968260884285, 'timestamp': '2025-10-01 04:26:14.586359', 'step': 7316, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:26:14.639387', 'step': 7316, 'epoch': 1} {'type': 'loss', 'content': 0.19168947637081146, 'timestamp': '2025-10-01 04:26:14.641189', 'step': 7317, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:26:14.694689', 'step': 7317, 'epoch': 1} {'type': 'loss', 'content': 0.21301977336406708, 'timestamp': '2025-10-01 04:26:14.696955', 'step': 7318, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:14.750297', 'step': 7318, 'epoch': 1} {'type': 'loss', 'content': 0.11664263159036636, 'timestamp': '2025-10-01 04:26:14.752668', 'step': 7319, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:14.805629', 'step': 7319, 'epoch': 1} {'type': 'loss', 'content': 0.16890105605125427, 'timestamp': '2025-10-01 04:26:14.811490', 'step': 7320, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:14.866889', 'step': 7320, 'epoch': 1} {'type': 'loss', 'content': 0.2551591098308563, 'timestamp': '2025-10-01 04:26:14.869484', 'step': 7321, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:14.926914', 'step': 7321, 'epoch': 1} {'type': 'loss', 'content': 0.18997035920619965, 'timestamp': '2025-10-01 04:26:14.929055', 'step': 7322, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:14.985486', 'step': 7322, 'epoch': 1} {'type': 'loss', 'content': 0.15335187315940857, 'timestamp': '2025-10-01 04:26:14.988201', 'step': 7323, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:15.044629', 'step': 7323, 'epoch': 1} {'type': 'loss', 'content': 0.1128477081656456, 'timestamp': '2025-10-01 04:26:15.051251', 'step': 7324, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:15.116430', 'step': 7324, 'epoch': 1} {'type': 'loss', 'content': 0.11950557678937912, 'timestamp': '2025-10-01 04:26:15.118873', 'step': 7325, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:15.177909', 'step': 7325, 'epoch': 1} {'type': 'loss', 'content': 0.18557871878147125, 'timestamp': '2025-10-01 04:26:15.180073', 'step': 7326, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:15.236318', 'step': 7326, 'epoch': 1} {'type': 'loss', 'content': 0.11904561519622803, 'timestamp': '2025-10-01 04:26:15.238734', 'step': 7327, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:15.293906', 'step': 7327, 'epoch': 1} {'type': 'loss', 'content': 0.14361795783042908, 'timestamp': '2025-10-01 04:26:15.300548', 'step': 7328, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:15.355831', 'step': 7328, 'epoch': 1} {'type': 'loss', 'content': 0.12259175628423691, 'timestamp': '2025-10-01 04:26:15.357785', 'step': 7329, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:15.412903', 'step': 7329, 'epoch': 1} {'type': 'loss', 'content': 0.20320066809654236, 'timestamp': '2025-10-01 04:26:15.416710', 'step': 7330, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:15.470826', 'step': 7330, 'epoch': 1} {'type': 'loss', 'content': 0.1214185580611229, 'timestamp': '2025-10-01 04:26:15.474183', 'step': 7331, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:15.527663', 'step': 7331, 'epoch': 1} {'type': 'loss', 'content': 0.1730661541223526, 'timestamp': '2025-10-01 04:26:15.535652', 'step': 7332, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:15.588766', 'step': 7332, 'epoch': 1} {'type': 'loss', 'content': 0.129582017660141, 'timestamp': '2025-10-01 04:26:15.591046', 'step': 7333, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:15.652526', 'step': 7333, 'epoch': 1} {'type': 'loss', 'content': 0.09904445707798004, 'timestamp': '2025-10-01 04:26:15.654522', 'step': 7334, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:15.707496', 'step': 7334, 'epoch': 1} {'type': 'loss', 'content': 0.10050909966230392, 'timestamp': '2025-10-01 04:26:15.710012', 'step': 7335, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:15.763162', 'step': 7335, 'epoch': 1} {'type': 'loss', 'content': 0.1878202259540558, 'timestamp': '2025-10-01 04:26:15.769069', 'step': 7336, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:15.836135', 'step': 7336, 'epoch': 1} {'type': 'loss', 'content': 0.22111396491527557, 'timestamp': '2025-10-01 04:26:15.838416', 'step': 7337, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:15.891013', 'step': 7337, 'epoch': 1} {'type': 'loss', 'content': 0.11742496490478516, 'timestamp': '2025-10-01 04:26:15.893094', 'step': 7338, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:26:15.947155', 'step': 7338, 'epoch': 1} {'type': 'loss', 'content': 0.20699435472488403, 'timestamp': '2025-10-01 04:26:15.958781', 'step': 7339, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:16.012817', 'step': 7339, 'epoch': 1} {'type': 'loss', 'content': 0.1773173213005066, 'timestamp': '2025-10-01 04:26:16.018442', 'step': 7340, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:16.074013', 'step': 7340, 'epoch': 1} {'type': 'loss', 'content': 0.20958097279071808, 'timestamp': '2025-10-01 04:26:16.076046', 'step': 7341, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:16.131045', 'step': 7341, 'epoch': 1} {'type': 'loss', 'content': 0.16635477542877197, 'timestamp': '2025-10-01 04:26:16.133183', 'step': 7342, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:16.195269', 'step': 7342, 'epoch': 1} {'type': 'loss', 'content': 0.12284079194068909, 'timestamp': '2025-10-01 04:26:16.197373', 'step': 7343, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:16.251190', 'step': 7343, 'epoch': 1} {'type': 'loss', 'content': 0.2847702205181122, 'timestamp': '2025-10-01 04:26:16.257241', 'step': 7344, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:16.311676', 'step': 7344, 'epoch': 1} {'type': 'loss', 'content': 0.14230448007583618, 'timestamp': '2025-10-01 04:26:16.313853', 'step': 7345, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:16.366524', 'step': 7345, 'epoch': 1} {'type': 'loss', 'content': 0.15179024636745453, 'timestamp': '2025-10-01 04:26:16.369078', 'step': 7346, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:16.423433', 'step': 7346, 'epoch': 1} {'type': 'loss', 'content': 0.13994714617729187, 'timestamp': '2025-10-01 04:26:16.433406', 'step': 7347, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:16.486549', 'step': 7347, 'epoch': 1} {'type': 'loss', 'content': 0.14654961228370667, 'timestamp': '2025-10-01 04:26:16.492538', 'step': 7348, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:16.545394', 'step': 7348, 'epoch': 1} {'type': 'loss', 'content': 0.06978738307952881, 'timestamp': '2025-10-01 04:26:16.547646', 'step': 7349, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:16.601123', 'step': 7349, 'epoch': 1} {'type': 'loss', 'content': 0.11280522495508194, 'timestamp': '2025-10-01 04:26:16.603313', 'step': 7350, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:16.656896', 'step': 7350, 'epoch': 1} {'type': 'loss', 'content': 0.12301018834114075, 'timestamp': '2025-10-01 04:26:16.668921', 'step': 7351, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:16.736987', 'step': 7351, 'epoch': 1} {'type': 'loss', 'content': 0.16622835397720337, 'timestamp': '2025-10-01 04:26:16.743320', 'step': 7352, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:16.796432', 'step': 7352, 'epoch': 1} {'type': 'loss', 'content': 0.16362571716308594, 'timestamp': '2025-10-01 04:26:16.798527', 'step': 7353, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:16.851791', 'step': 7353, 'epoch': 1} {'type': 'loss', 'content': 0.15198186039924622, 'timestamp': '2025-10-01 04:26:16.853913', 'step': 7354, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:16.914768', 'step': 7354, 'epoch': 1} {'type': 'loss', 'content': 0.21728219091892242, 'timestamp': '2025-10-01 04:26:16.916876', 'step': 7355, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:26:16.997915', 'step': 7355, 'epoch': 1} {'type': 'loss', 'content': 0.10681739449501038, 'timestamp': '2025-10-01 04:26:17.004230', 'step': 7356, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:17.058276', 'step': 7356, 'epoch': 1} {'type': 'loss', 'content': 0.1928410530090332, 'timestamp': '2025-10-01 04:26:17.060697', 'step': 7357, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:26:17.115145', 'step': 7357, 'epoch': 1} {'type': 'loss', 'content': 0.10142093151807785, 'timestamp': '2025-10-01 04:26:17.117239', 'step': 7358, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:17.187477', 'step': 7358, 'epoch': 1} {'type': 'loss', 'content': 0.15704986453056335, 'timestamp': '2025-10-01 04:26:17.190557', 'step': 7359, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:17.256883', 'step': 7359, 'epoch': 1} {'type': 'loss', 'content': 0.13512487709522247, 'timestamp': '2025-10-01 04:26:17.263537', 'step': 7360, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:17.317760', 'step': 7360, 'epoch': 1} {'type': 'loss', 'content': 0.1987832486629486, 'timestamp': '2025-10-01 04:26:17.319968', 'step': 7361, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:17.374338', 'step': 7361, 'epoch': 1} {'type': 'loss', 'content': 0.10412397980690002, 'timestamp': '2025-10-01 04:26:17.376630', 'step': 7362, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:17.431412', 'step': 7362, 'epoch': 1} {'type': 'loss', 'content': 0.08803518861532211, 'timestamp': '2025-10-01 04:26:17.437033', 'step': 7363, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:17.493810', 'step': 7363, 'epoch': 1} {'type': 'loss', 'content': 0.2543330192565918, 'timestamp': '2025-10-01 04:26:17.500262', 'step': 7364, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:17.554732', 'step': 7364, 'epoch': 1} {'type': 'loss', 'content': 0.21662883460521698, 'timestamp': '2025-10-01 04:26:17.556945', 'step': 7365, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:17.612213', 'step': 7365, 'epoch': 1} {'type': 'loss', 'content': 0.09901893883943558, 'timestamp': '2025-10-01 04:26:17.614524', 'step': 7366, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:17.669393', 'step': 7366, 'epoch': 1} {'type': 'loss', 'content': 0.11498570442199707, 'timestamp': '2025-10-01 04:26:17.671602', 'step': 7367, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:17.726499', 'step': 7367, 'epoch': 1} {'type': 'loss', 'content': 0.20612387359142303, 'timestamp': '2025-10-01 04:26:17.732558', 'step': 7368, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:17.802918', 'step': 7368, 'epoch': 1} {'type': 'loss', 'content': 0.14649000763893127, 'timestamp': '2025-10-01 04:26:17.805048', 'step': 7369, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:17.858898', 'step': 7369, 'epoch': 1} {'type': 'loss', 'content': 0.13970507681369781, 'timestamp': '2025-10-01 04:26:17.861210', 'step': 7370, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:26:17.915733', 'step': 7370, 'epoch': 1} {'type': 'loss', 'content': 0.13059349358081818, 'timestamp': '2025-10-01 04:26:17.918166', 'step': 7371, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:17.983063', 'step': 7371, 'epoch': 1} {'type': 'loss', 'content': 0.14887215197086334, 'timestamp': '2025-10-01 04:26:17.989777', 'step': 7372, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:18.045931', 'step': 7372, 'epoch': 1} {'type': 'loss', 'content': 0.17667151987552643, 'timestamp': '2025-10-01 04:26:18.048129', 'step': 7373, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:26:18.108731', 'step': 7373, 'epoch': 1} {'type': 'loss', 'content': 0.20021744072437286, 'timestamp': '2025-10-01 04:26:18.111027', 'step': 7374, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:18.169333', 'step': 7374, 'epoch': 1} {'type': 'loss', 'content': 0.15182261168956757, 'timestamp': '2025-10-01 04:26:18.171588', 'step': 7375, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:18.230691', 'step': 7375, 'epoch': 1} {'type': 'loss', 'content': 0.2106853723526001, 'timestamp': '2025-10-01 04:26:18.237584', 'step': 7376, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:18.293479', 'step': 7376, 'epoch': 1} {'type': 'loss', 'content': 0.18669958412647247, 'timestamp': '2025-10-01 04:26:18.295663', 'step': 7377, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:18.352975', 'step': 7377, 'epoch': 1} {'type': 'loss', 'content': 0.15359006822109222, 'timestamp': '2025-10-01 04:26:18.355260', 'step': 7378, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:18.411279', 'step': 7378, 'epoch': 1} {'type': 'loss', 'content': 0.1427377313375473, 'timestamp': '2025-10-01 04:26:18.413590', 'step': 7379, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:18.471723', 'step': 7379, 'epoch': 1} {'type': 'loss', 'content': 0.15512171387672424, 'timestamp': '2025-10-01 04:26:18.478602', 'step': 7380, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:18.556186', 'step': 7380, 'epoch': 1} {'type': 'loss', 'content': 0.1972097009420395, 'timestamp': '2025-10-01 04:26:18.558241', 'step': 7381, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:18.613470', 'step': 7381, 'epoch': 1} {'type': 'loss', 'content': 0.19943509995937347, 'timestamp': '2025-10-01 04:26:18.615715', 'step': 7382, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:18.671024', 'step': 7382, 'epoch': 1} {'type': 'loss', 'content': 0.1685384064912796, 'timestamp': '2025-10-01 04:26:18.673225', 'step': 7383, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:18.730291', 'step': 7383, 'epoch': 1} {'type': 'loss', 'content': 0.1426488161087036, 'timestamp': '2025-10-01 04:26:18.737418', 'step': 7384, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:18.797410', 'step': 7384, 'epoch': 1} {'type': 'loss', 'content': 0.2304188311100006, 'timestamp': '2025-10-01 04:26:18.799843', 'step': 7385, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:18.861087', 'step': 7385, 'epoch': 1} {'type': 'loss', 'content': 0.15209798514842987, 'timestamp': '2025-10-01 04:26:18.863544', 'step': 7386, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:18.922476', 'step': 7386, 'epoch': 1} {'type': 'loss', 'content': 0.0816059336066246, 'timestamp': '2025-10-01 04:26:18.924670', 'step': 7387, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:18.979994', 'step': 7387, 'epoch': 1} {'type': 'loss', 'content': 0.11440510302782059, 'timestamp': '2025-10-01 04:26:18.986356', 'step': 7388, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:19.040699', 'step': 7388, 'epoch': 1} {'type': 'loss', 'content': 0.23882462084293365, 'timestamp': '2025-10-01 04:26:19.051537', 'step': 7389, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:19.119462', 'step': 7389, 'epoch': 1} {'type': 'loss', 'content': 0.14393122494220734, 'timestamp': '2025-10-01 04:26:19.121603', 'step': 7390, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:19.175840', 'step': 7390, 'epoch': 1} {'type': 'loss', 'content': 0.2396049201488495, 'timestamp': '2025-10-01 04:26:19.178196', 'step': 7391, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:19.232069', 'step': 7391, 'epoch': 1} {'type': 'loss', 'content': 0.15245094895362854, 'timestamp': '2025-10-01 04:26:19.238158', 'step': 7392, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:26:19.291214', 'step': 7392, 'epoch': 1} {'type': 'loss', 'content': 0.18816208839416504, 'timestamp': '2025-10-01 04:26:19.293333', 'step': 7393, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:19.347098', 'step': 7393, 'epoch': 1} {'type': 'loss', 'content': 0.18188261985778809, 'timestamp': '2025-10-01 04:26:19.349322', 'step': 7394, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:19.415752', 'step': 7394, 'epoch': 1} {'type': 'loss', 'content': 0.14246803522109985, 'timestamp': '2025-10-01 04:26:19.418109', 'step': 7395, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:19.472093', 'step': 7395, 'epoch': 1} {'type': 'loss', 'content': 0.14632365107536316, 'timestamp': '2025-10-01 04:26:19.478081', 'step': 7396, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:26:19.535651', 'step': 7396, 'epoch': 1} {'type': 'loss', 'content': 0.12844952940940857, 'timestamp': '2025-10-01 04:26:19.538116', 'step': 7397, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:19.591717', 'step': 7397, 'epoch': 1} {'type': 'loss', 'content': 0.13045385479927063, 'timestamp': '2025-10-01 04:26:19.594268', 'step': 7398, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:19.648243', 'step': 7398, 'epoch': 1} {'type': 'loss', 'content': 0.13327929377555847, 'timestamp': '2025-10-01 04:26:19.651157', 'step': 7399, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:19.712505', 'step': 7399, 'epoch': 1} {'type': 'loss', 'content': 0.13497592508792877, 'timestamp': '2025-10-01 04:26:19.726422', 'step': 7400, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:26:19.789580', 'step': 7400, 'epoch': 1} {'type': 'loss', 'content': 0.25240737199783325, 'timestamp': '2025-10-01 04:26:19.791775', 'step': 7401, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:19.845502', 'step': 7401, 'epoch': 1} {'type': 'loss', 'content': 0.16609753668308258, 'timestamp': '2025-10-01 04:26:19.847669', 'step': 7402, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:19.901502', 'step': 7402, 'epoch': 1} {'type': 'loss', 'content': 0.1589798778295517, 'timestamp': '2025-10-01 04:26:19.903669', 'step': 7403, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:19.957395', 'step': 7403, 'epoch': 1} {'type': 'loss', 'content': 0.18946796655654907, 'timestamp': '2025-10-01 04:26:19.965449', 'step': 7404, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:26:20.017825', 'step': 7404, 'epoch': 1} {'type': 'loss', 'content': 0.1173882856965065, 'timestamp': '2025-10-01 04:26:20.021246', 'step': 7405, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:20.074932', 'step': 7405, 'epoch': 1} {'type': 'loss', 'content': 0.15184596180915833, 'timestamp': '2025-10-01 04:26:20.078722', 'step': 7406, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:20.137061', 'step': 7406, 'epoch': 1} {'type': 'loss', 'content': 0.19886831939220428, 'timestamp': '2025-10-01 04:26:20.139694', 'step': 7407, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:20.200987', 'step': 7407, 'epoch': 1} {'type': 'loss', 'content': 0.32599538564682007, 'timestamp': '2025-10-01 04:26:20.206720', 'step': 7408, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:20.260349', 'step': 7408, 'epoch': 1} {'type': 'loss', 'content': 0.22236798703670502, 'timestamp': '2025-10-01 04:26:20.262446', 'step': 7409, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:20.315333', 'step': 7409, 'epoch': 1} {'type': 'loss', 'content': 0.1300145536661148, 'timestamp': '2025-10-01 04:26:20.317543', 'step': 7410, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:20.370886', 'step': 7410, 'epoch': 1} {'type': 'loss', 'content': 0.09034392237663269, 'timestamp': '2025-10-01 04:26:20.373103', 'step': 7411, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:20.426428', 'step': 7411, 'epoch': 1} {'type': 'loss', 'content': 0.11787757277488708, 'timestamp': '2025-10-01 04:26:20.432260', 'step': 7412, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:26:20.486676', 'step': 7412, 'epoch': 1} {'type': 'loss', 'content': 0.20824889838695526, 'timestamp': '2025-10-01 04:26:20.492491', 'step': 7413, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:20.553026', 'step': 7413, 'epoch': 1} {'type': 'loss', 'content': 0.0823383629322052, 'timestamp': '2025-10-01 04:26:20.555123', 'step': 7414, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:20.609031', 'step': 7414, 'epoch': 1} {'type': 'loss', 'content': 0.06291670352220535, 'timestamp': '2025-10-01 04:26:20.611452', 'step': 7415, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:20.665534', 'step': 7415, 'epoch': 1} {'type': 'loss', 'content': 0.1201653927564621, 'timestamp': '2025-10-01 04:26:20.671292', 'step': 7416, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:26:20.723868', 'step': 7416, 'epoch': 1} {'type': 'loss', 'content': 0.25830909609794617, 'timestamp': '2025-10-01 04:26:20.726122', 'step': 7417, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:20.784131', 'step': 7417, 'epoch': 1} {'type': 'loss', 'content': 0.06508050113916397, 'timestamp': '2025-10-01 04:26:20.788030', 'step': 7418, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:20.843843', 'step': 7418, 'epoch': 1} {'type': 'loss', 'content': 0.17256991565227509, 'timestamp': '2025-10-01 04:26:20.849192', 'step': 7419, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:20.908461', 'step': 7419, 'epoch': 1} {'type': 'loss', 'content': 0.16798612475395203, 'timestamp': '2025-10-01 04:26:20.915031', 'step': 7420, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:26:20.969185', 'step': 7420, 'epoch': 1} {'type': 'loss', 'content': 0.08419875055551529, 'timestamp': '2025-10-01 04:26:20.974666', 'step': 7421, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:21.040417', 'step': 7421, 'epoch': 1} {'type': 'loss', 'content': 0.15461672842502594, 'timestamp': '2025-10-01 04:26:21.042529', 'step': 7422, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:21.095670', 'step': 7422, 'epoch': 1} {'type': 'loss', 'content': 0.16305077075958252, 'timestamp': '2025-10-01 04:26:21.100268', 'step': 7423, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:21.163999', 'step': 7423, 'epoch': 1} {'type': 'loss', 'content': 0.10722574591636658, 'timestamp': '2025-10-01 04:26:21.169978', 'step': 7424, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:21.234876', 'step': 7424, 'epoch': 1} {'type': 'loss', 'content': 0.26332521438598633, 'timestamp': '2025-10-01 04:26:21.237795', 'step': 7425, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:21.291112', 'step': 7425, 'epoch': 1} {'type': 'loss', 'content': 0.13883419334888458, 'timestamp': '2025-10-01 04:26:21.294642', 'step': 7426, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:21.350688', 'step': 7426, 'epoch': 1} {'type': 'loss', 'content': 0.11726260185241699, 'timestamp': '2025-10-01 04:26:21.354308', 'step': 7427, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:21.419423', 'step': 7427, 'epoch': 1} {'type': 'loss', 'content': 0.08766201883554459, 'timestamp': '2025-10-01 04:26:21.425732', 'step': 7428, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:26:21.484393', 'step': 7428, 'epoch': 1} {'type': 'loss', 'content': 0.17657506465911865, 'timestamp': '2025-10-01 04:26:21.486811', 'step': 7429, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:21.540595', 'step': 7429, 'epoch': 1} {'type': 'loss', 'content': 0.12049055099487305, 'timestamp': '2025-10-01 04:26:21.542835', 'step': 7430, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:21.596646', 'step': 7430, 'epoch': 1} {'type': 'loss', 'content': 0.19750316441059113, 'timestamp': '2025-10-01 04:26:21.599091', 'step': 7431, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:26:21.661132', 'step': 7431, 'epoch': 1} {'type': 'loss', 'content': 0.18096394836902618, 'timestamp': '2025-10-01 04:26:21.667795', 'step': 7432, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:21.721811', 'step': 7432, 'epoch': 1} {'type': 'loss', 'content': 0.15348589420318604, 'timestamp': '2025-10-01 04:26:21.724002', 'step': 7433, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:26:21.778766', 'step': 7433, 'epoch': 1} {'type': 'loss', 'content': 0.1563330888748169, 'timestamp': '2025-10-01 04:26:21.781551', 'step': 7434, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:21.835792', 'step': 7434, 'epoch': 1} {'type': 'loss', 'content': 0.1436365246772766, 'timestamp': '2025-10-01 04:26:21.847007', 'step': 7435, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:21.900692', 'step': 7435, 'epoch': 1} {'type': 'loss', 'content': 0.1251683384180069, 'timestamp': '2025-10-01 04:26:21.907030', 'step': 7436, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:21.960649', 'step': 7436, 'epoch': 1} {'type': 'loss', 'content': 0.1668325513601303, 'timestamp': '2025-10-01 04:26:21.962962', 'step': 7437, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:22.020923', 'step': 7437, 'epoch': 1} {'type': 'loss', 'content': 0.06328944861888885, 'timestamp': '2025-10-01 04:26:22.028190', 'step': 7438, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:22.083485', 'step': 7438, 'epoch': 1} {'type': 'loss', 'content': 0.15831314027309418, 'timestamp': '2025-10-01 04:26:22.085873', 'step': 7439, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:22.143302', 'step': 7439, 'epoch': 1} {'type': 'loss', 'content': 0.18696115911006927, 'timestamp': '2025-10-01 04:26:22.149582', 'step': 7440, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:22.204271', 'step': 7440, 'epoch': 1} {'type': 'loss', 'content': 0.14841444790363312, 'timestamp': '2025-10-01 04:26:22.206893', 'step': 7441, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:22.269074', 'step': 7441, 'epoch': 1} {'type': 'loss', 'content': 0.14064162969589233, 'timestamp': '2025-10-01 04:26:22.271715', 'step': 7442, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-01 04:26:22.329039', 'step': 7442, 'epoch': 1} {'type': 'loss', 'content': 0.24856628477573395, 'timestamp': '2025-10-01 04:26:22.331495', 'step': 7443, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:22.385882', 'step': 7443, 'epoch': 1} {'type': 'loss', 'content': 0.07514560967683792, 'timestamp': '2025-10-01 04:26:22.392371', 'step': 7444, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:22.446916', 'step': 7444, 'epoch': 1} {'type': 'loss', 'content': 0.16892065107822418, 'timestamp': '2025-10-01 04:26:22.449505', 'step': 7445, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:22.504757', 'step': 7445, 'epoch': 1} {'type': 'loss', 'content': 0.1403994858264923, 'timestamp': '2025-10-01 04:26:22.522219', 'step': 7446, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:22.576824', 'step': 7446, 'epoch': 1} {'type': 'loss', 'content': 0.2243858277797699, 'timestamp': '2025-10-01 04:26:22.579534', 'step': 7447, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:22.633594', 'step': 7447, 'epoch': 1} {'type': 'loss', 'content': 0.16820910573005676, 'timestamp': '2025-10-01 04:26:22.639503', 'step': 7448, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:26:22.693815', 'step': 7448, 'epoch': 1} {'type': 'loss', 'content': 0.12386717647314072, 'timestamp': '2025-10-01 04:26:22.696132', 'step': 7449, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:22.750494', 'step': 7449, 'epoch': 1} {'type': 'loss', 'content': 0.1412162482738495, 'timestamp': '2025-10-01 04:26:22.752700', 'step': 7450, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:22.808128', 'step': 7450, 'epoch': 1} {'type': 'loss', 'content': 0.18320699036121368, 'timestamp': '2025-10-01 04:26:22.810822', 'step': 7451, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:22.865813', 'step': 7451, 'epoch': 1} {'type': 'loss', 'content': 0.12757082283496857, 'timestamp': '2025-10-01 04:26:22.872131', 'step': 7452, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:22.926355', 'step': 7452, 'epoch': 1} {'type': 'loss', 'content': 0.10397715121507645, 'timestamp': '2025-10-01 04:26:22.928820', 'step': 7453, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:22.985630', 'step': 7453, 'epoch': 1} {'type': 'loss', 'content': 0.06676319241523743, 'timestamp': '2025-10-01 04:26:22.988459', 'step': 7454, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:26:23.044324', 'step': 7454, 'epoch': 1} {'type': 'loss', 'content': 0.3817055821418762, 'timestamp': '2025-10-01 04:26:23.046794', 'step': 7455, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:23.101376', 'step': 7455, 'epoch': 1} {'type': 'loss', 'content': 0.1483612209558487, 'timestamp': '2025-10-01 04:26:23.107529', 'step': 7456, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-01 04:26:36.788206', 'step': 7456, 'epoch': 1} {'type': 'pplx', 'content': 11064.457878951378, 'timestamp': '2025-10-01 04:26:36.791011', 'step': 7456, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:36.844663', 'step': 7456, 'epoch': 1} {'type': 'loss', 'content': 0.149276465177536, 'timestamp': '2025-10-01 04:26:36.847819', 'step': 7457, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:36.901451', 'step': 7457, 'epoch': 1} {'type': 'loss', 'content': 0.12336145341396332, 'timestamp': '2025-10-01 04:26:36.903649', 'step': 7458, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:36.956812', 'step': 7458, 'epoch': 1} {'type': 'loss', 'content': 0.08437984436750412, 'timestamp': '2025-10-01 04:26:36.958948', 'step': 7459, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [1, 208], 'flops': 1040006410960.0}, 'timestamp': '2025-10-01 04:26:37.039436', 'step': 7459, 'epoch': 1} {'type': 'loss', 'content': 0.36592674255371094, 'timestamp': '2025-10-01 04:26:37.045520', 'step': 7460, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:37.098654', 'step': 7460, 'epoch': 2} {'type': 'loss', 'content': 0.07252538204193115, 'timestamp': '2025-10-01 04:26:37.100576', 'step': 7461, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:37.153557', 'step': 7461, 'epoch': 2} {'type': 'loss', 'content': 0.11236516386270523, 'timestamp': '2025-10-01 04:26:37.155731', 'step': 7462, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:37.208947', 'step': 7462, 'epoch': 2} {'type': 'loss', 'content': 0.14757640659809113, 'timestamp': '2025-10-01 04:26:37.211324', 'step': 7463, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:37.264564', 'step': 7463, 'epoch': 2} {'type': 'loss', 'content': 0.12927144765853882, 'timestamp': '2025-10-01 04:26:37.270265', 'step': 7464, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:37.322564', 'step': 7464, 'epoch': 2} {'type': 'loss', 'content': 0.20355674624443054, 'timestamp': '2025-10-01 04:26:37.324604', 'step': 7465, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:37.378469', 'step': 7465, 'epoch': 2} {'type': 'loss', 'content': 0.09308921545743942, 'timestamp': '2025-10-01 04:26:37.380777', 'step': 7466, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:37.439542', 'step': 7466, 'epoch': 2} {'type': 'loss', 'content': 0.1599310338497162, 'timestamp': '2025-10-01 04:26:37.441761', 'step': 7467, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:37.494407', 'step': 7467, 'epoch': 2} {'type': 'loss', 'content': 0.09445016831159592, 'timestamp': '2025-10-01 04:26:37.500146', 'step': 7468, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:37.561128', 'step': 7468, 'epoch': 2} {'type': 'loss', 'content': 0.15135136246681213, 'timestamp': '2025-10-01 04:26:37.563296', 'step': 7469, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:37.617529', 'step': 7469, 'epoch': 2} {'type': 'loss', 'content': 0.14697520434856415, 'timestamp': '2025-10-01 04:26:37.619571', 'step': 7470, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:37.672690', 'step': 7470, 'epoch': 2} {'type': 'loss', 'content': 0.11579432338476181, 'timestamp': '2025-10-01 04:26:37.674888', 'step': 7471, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:37.727710', 'step': 7471, 'epoch': 2} {'type': 'loss', 'content': 0.15385842323303223, 'timestamp': '2025-10-01 04:26:37.734239', 'step': 7472, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:37.789614', 'step': 7472, 'epoch': 2} {'type': 'loss', 'content': 0.11971485614776611, 'timestamp': '2025-10-01 04:26:37.791693', 'step': 7473, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:37.844639', 'step': 7473, 'epoch': 2} {'type': 'loss', 'content': 0.1098160445690155, 'timestamp': '2025-10-01 04:26:37.846796', 'step': 7474, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:37.900198', 'step': 7474, 'epoch': 2} {'type': 'loss', 'content': 0.12246385216712952, 'timestamp': '2025-10-01 04:26:37.902204', 'step': 7475, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:37.955021', 'step': 7475, 'epoch': 2} {'type': 'loss', 'content': 0.2053043246269226, 'timestamp': '2025-10-01 04:26:37.960750', 'step': 7476, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:38.013200', 'step': 7476, 'epoch': 2} {'type': 'loss', 'content': 0.09769229590892792, 'timestamp': '2025-10-01 04:26:38.015521', 'step': 7477, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:38.068927', 'step': 7477, 'epoch': 2} {'type': 'loss', 'content': 0.17630991339683533, 'timestamp': '2025-10-01 04:26:38.070896', 'step': 7478, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:26:38.125283', 'step': 7478, 'epoch': 2} {'type': 'loss', 'content': 0.23966531455516815, 'timestamp': '2025-10-01 04:26:38.127421', 'step': 7479, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:38.180390', 'step': 7479, 'epoch': 2} {'type': 'loss', 'content': 0.09445396065711975, 'timestamp': '2025-10-01 04:26:38.186183', 'step': 7480, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:38.239402', 'step': 7480, 'epoch': 2} {'type': 'loss', 'content': 0.08336129784584045, 'timestamp': '2025-10-01 04:26:38.241617', 'step': 7481, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:38.294370', 'step': 7481, 'epoch': 2} {'type': 'loss', 'content': 0.15093179047107697, 'timestamp': '2025-10-01 04:26:38.297119', 'step': 7482, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:38.349764', 'step': 7482, 'epoch': 2} {'type': 'loss', 'content': 0.0730629414319992, 'timestamp': '2025-10-01 04:26:38.351905', 'step': 7483, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:38.405652', 'step': 7483, 'epoch': 2} {'type': 'loss', 'content': 0.16482672095298767, 'timestamp': '2025-10-01 04:26:38.411326', 'step': 7484, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:38.464779', 'step': 7484, 'epoch': 2} {'type': 'loss', 'content': 0.07329334318637848, 'timestamp': '2025-10-01 04:26:38.466920', 'step': 7485, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:26:38.520722', 'step': 7485, 'epoch': 2} {'type': 'loss', 'content': 0.15065009891986847, 'timestamp': '2025-10-01 04:26:38.523061', 'step': 7486, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:38.576688', 'step': 7486, 'epoch': 2} {'type': 'loss', 'content': 0.23218493163585663, 'timestamp': '2025-10-01 04:26:38.579043', 'step': 7487, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:38.632352', 'step': 7487, 'epoch': 2} {'type': 'loss', 'content': 0.13589416444301605, 'timestamp': '2025-10-01 04:26:38.638496', 'step': 7488, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:38.692109', 'step': 7488, 'epoch': 2} {'type': 'loss', 'content': 0.12637251615524292, 'timestamp': '2025-10-01 04:26:38.694694', 'step': 7489, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:38.749838', 'step': 7489, 'epoch': 2} {'type': 'loss', 'content': 0.07980687916278839, 'timestamp': '2025-10-01 04:26:38.751968', 'step': 7490, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:38.806065', 'step': 7490, 'epoch': 2} {'type': 'loss', 'content': 0.18389169871807098, 'timestamp': '2025-10-01 04:26:38.808516', 'step': 7491, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:38.862530', 'step': 7491, 'epoch': 2} {'type': 'loss', 'content': 0.08694520592689514, 'timestamp': '2025-10-01 04:26:38.868536', 'step': 7492, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:38.923284', 'step': 7492, 'epoch': 2} {'type': 'loss', 'content': 0.2350587695837021, 'timestamp': '2025-10-01 04:26:38.925555', 'step': 7493, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:38.980080', 'step': 7493, 'epoch': 2} {'type': 'loss', 'content': 0.13092957437038422, 'timestamp': '2025-10-01 04:26:38.982468', 'step': 7494, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:39.037135', 'step': 7494, 'epoch': 2} {'type': 'loss', 'content': 0.22647543251514435, 'timestamp': '2025-10-01 04:26:39.039828', 'step': 7495, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:39.093574', 'step': 7495, 'epoch': 2} {'type': 'loss', 'content': 0.0851021483540535, 'timestamp': '2025-10-01 04:26:39.099613', 'step': 7496, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:39.153482', 'step': 7496, 'epoch': 2} {'type': 'loss', 'content': 0.16627894341945648, 'timestamp': '2025-10-01 04:26:39.155608', 'step': 7497, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:39.209761', 'step': 7497, 'epoch': 2} {'type': 'loss', 'content': 0.10586319118738174, 'timestamp': '2025-10-01 04:26:39.212114', 'step': 7498, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:39.268055', 'step': 7498, 'epoch': 2} {'type': 'loss', 'content': 0.1732705980539322, 'timestamp': '2025-10-01 04:26:39.270508', 'step': 7499, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:39.324133', 'step': 7499, 'epoch': 2} {'type': 'loss', 'content': 0.14571528136730194, 'timestamp': '2025-10-01 04:26:39.330036', 'step': 7500, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 7500', 'timestamp': '2025-10-01 04:26:39.713539', 'step': 7500, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:39.770111', 'step': 7500, 'epoch': 2} {'type': 'loss', 'content': 0.11435997486114502, 'timestamp': '2025-10-01 04:26:39.772532', 'step': 7501, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:39.829257', 'step': 7501, 'epoch': 2} {'type': 'loss', 'content': 0.13176797330379486, 'timestamp': '2025-10-01 04:26:39.831507', 'step': 7502, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:39.886296', 'step': 7502, 'epoch': 2} {'type': 'loss', 'content': 0.13943222165107727, 'timestamp': '2025-10-01 04:26:39.888562', 'step': 7503, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:39.952958', 'step': 7503, 'epoch': 2} {'type': 'loss', 'content': 0.15173478424549103, 'timestamp': '2025-10-01 04:26:39.958848', 'step': 7504, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:40.013568', 'step': 7504, 'epoch': 2} {'type': 'loss', 'content': 0.19624093174934387, 'timestamp': '2025-10-01 04:26:40.016023', 'step': 7505, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:40.076454', 'step': 7505, 'epoch': 2} {'type': 'loss', 'content': 0.15154649317264557, 'timestamp': '2025-10-01 04:26:40.078940', 'step': 7506, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:26:40.134306', 'step': 7506, 'epoch': 2} {'type': 'loss', 'content': 0.14368875324726105, 'timestamp': '2025-10-01 04:26:40.136582', 'step': 7507, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:40.191001', 'step': 7507, 'epoch': 2} {'type': 'loss', 'content': 0.21715392172336578, 'timestamp': '2025-10-01 04:26:40.196812', 'step': 7508, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:40.252388', 'step': 7508, 'epoch': 2} {'type': 'loss', 'content': 0.18358305096626282, 'timestamp': '2025-10-01 04:26:40.256595', 'step': 7509, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:40.313612', 'step': 7509, 'epoch': 2} {'type': 'loss', 'content': 0.09981139004230499, 'timestamp': '2025-10-01 04:26:40.316492', 'step': 7510, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:40.373491', 'step': 7510, 'epoch': 2} {'type': 'loss', 'content': 0.17646270990371704, 'timestamp': '2025-10-01 04:26:40.375948', 'step': 7511, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:40.431452', 'step': 7511, 'epoch': 2} {'type': 'loss', 'content': 0.24262510240077972, 'timestamp': '2025-10-01 04:26:40.437524', 'step': 7512, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:40.506102', 'step': 7512, 'epoch': 2} {'type': 'loss', 'content': 0.12151068449020386, 'timestamp': '2025-10-01 04:26:40.508022', 'step': 7513, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:26:40.561810', 'step': 7513, 'epoch': 2} {'type': 'loss', 'content': 0.11013802140951157, 'timestamp': '2025-10-01 04:26:40.564049', 'step': 7514, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:26:40.617021', 'step': 7514, 'epoch': 2} {'type': 'loss', 'content': 0.15380854904651642, 'timestamp': '2025-10-01 04:26:40.619174', 'step': 7515, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:26:40.672021', 'step': 7515, 'epoch': 2} {'type': 'loss', 'content': 0.09495712071657181, 'timestamp': '2025-10-01 04:26:40.678161', 'step': 7516, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:40.731003', 'step': 7516, 'epoch': 2} {'type': 'loss', 'content': 0.09845782071352005, 'timestamp': '2025-10-01 04:26:40.733242', 'step': 7517, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:40.786737', 'step': 7517, 'epoch': 2} {'type': 'loss', 'content': 0.18823951482772827, 'timestamp': '2025-10-01 04:26:40.788702', 'step': 7518, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:40.841643', 'step': 7518, 'epoch': 2} {'type': 'loss', 'content': 0.16261211037635803, 'timestamp': '2025-10-01 04:26:40.844772', 'step': 7519, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:26:40.898291', 'step': 7519, 'epoch': 2} {'type': 'loss', 'content': 0.15367057919502258, 'timestamp': '2025-10-01 04:26:40.903946', 'step': 7520, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:40.956583', 'step': 7520, 'epoch': 2} {'type': 'loss', 'content': 0.13157202303409576, 'timestamp': '2025-10-01 04:26:40.958601', 'step': 7521, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:41.011472', 'step': 7521, 'epoch': 2} {'type': 'loss', 'content': 0.12216322124004364, 'timestamp': '2025-10-01 04:26:41.013585', 'step': 7522, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:41.070021', 'step': 7522, 'epoch': 2} {'type': 'loss', 'content': 0.2127542644739151, 'timestamp': '2025-10-01 04:26:41.072220', 'step': 7523, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:41.126912', 'step': 7523, 'epoch': 2} {'type': 'loss', 'content': 0.1050167977809906, 'timestamp': '2025-10-01 04:26:41.133470', 'step': 7524, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:41.185995', 'step': 7524, 'epoch': 2} {'type': 'loss', 'content': 0.0661199614405632, 'timestamp': '2025-10-01 04:26:41.187905', 'step': 7525, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:26:41.240393', 'step': 7525, 'epoch': 2} {'type': 'loss', 'content': 0.1538950502872467, 'timestamp': '2025-10-01 04:26:41.242963', 'step': 7526, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:26:41.295982', 'step': 7526, 'epoch': 2} {'type': 'loss', 'content': 0.10804685950279236, 'timestamp': '2025-10-01 04:26:41.298053', 'step': 7527, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:41.363108', 'step': 7527, 'epoch': 2} {'type': 'loss', 'content': 0.13154809176921844, 'timestamp': '2025-10-01 04:26:41.368606', 'step': 7528, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:41.422157', 'step': 7528, 'epoch': 2} {'type': 'loss', 'content': 0.1975700408220291, 'timestamp': '2025-10-01 04:26:41.424062', 'step': 7529, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:41.476508', 'step': 7529, 'epoch': 2} {'type': 'loss', 'content': 0.2228030115365982, 'timestamp': '2025-10-01 04:26:41.479130', 'step': 7530, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:41.532013', 'step': 7530, 'epoch': 2} {'type': 'loss', 'content': 0.11925532668828964, 'timestamp': '2025-10-01 04:26:41.534145', 'step': 7531, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:41.588426', 'step': 7531, 'epoch': 2} {'type': 'loss', 'content': 0.11577384173870087, 'timestamp': '2025-10-01 04:26:41.598486', 'step': 7532, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:41.651790', 'step': 7532, 'epoch': 2} {'type': 'loss', 'content': 0.1635282039642334, 'timestamp': '2025-10-01 04:26:41.653883', 'step': 7533, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:41.706725', 'step': 7533, 'epoch': 2} {'type': 'loss', 'content': 0.08108767122030258, 'timestamp': '2025-10-01 04:26:41.708876', 'step': 7534, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:41.761492', 'step': 7534, 'epoch': 2} {'type': 'loss', 'content': 0.14550495147705078, 'timestamp': '2025-10-01 04:26:41.764056', 'step': 7535, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:41.817288', 'step': 7535, 'epoch': 2} {'type': 'loss', 'content': 0.26143792271614075, 'timestamp': '2025-10-01 04:26:41.822889', 'step': 7536, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:41.874980', 'step': 7536, 'epoch': 2} {'type': 'loss', 'content': 0.10834639519453049, 'timestamp': '2025-10-01 04:26:41.877018', 'step': 7537, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:41.930183', 'step': 7537, 'epoch': 2} {'type': 'loss', 'content': 0.13512146472930908, 'timestamp': '2025-10-01 04:26:41.932302', 'step': 7538, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:41.987009', 'step': 7538, 'epoch': 2} {'type': 'loss', 'content': 0.1348550170660019, 'timestamp': '2025-10-01 04:26:41.989251', 'step': 7539, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:42.043159', 'step': 7539, 'epoch': 2} {'type': 'loss', 'content': 0.1541173905134201, 'timestamp': '2025-10-01 04:26:42.049021', 'step': 7540, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:26:42.102471', 'step': 7540, 'epoch': 2} {'type': 'loss', 'content': 0.12248256802558899, 'timestamp': '2025-10-01 04:26:42.104619', 'step': 7541, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:42.159319', 'step': 7541, 'epoch': 2} {'type': 'loss', 'content': 0.1603388637304306, 'timestamp': '2025-10-01 04:26:42.161173', 'step': 7542, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:42.214808', 'step': 7542, 'epoch': 2} {'type': 'loss', 'content': 0.1293775588274002, 'timestamp': '2025-10-01 04:26:42.217339', 'step': 7543, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:42.272255', 'step': 7543, 'epoch': 2} {'type': 'loss', 'content': 0.1313719004392624, 'timestamp': '2025-10-01 04:26:42.278440', 'step': 7544, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:42.332829', 'step': 7544, 'epoch': 2} {'type': 'loss', 'content': 0.10345403850078583, 'timestamp': '2025-10-01 04:26:42.334680', 'step': 7545, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:42.387658', 'step': 7545, 'epoch': 2} {'type': 'loss', 'content': 0.11455054581165314, 'timestamp': '2025-10-01 04:26:42.390118', 'step': 7546, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:42.443996', 'step': 7546, 'epoch': 2} {'type': 'loss', 'content': 0.17408592998981476, 'timestamp': '2025-10-01 04:26:42.446352', 'step': 7547, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:42.500210', 'step': 7547, 'epoch': 2} {'type': 'loss', 'content': 0.22731487452983856, 'timestamp': '2025-10-01 04:26:42.506605', 'step': 7548, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:42.559496', 'step': 7548, 'epoch': 2} {'type': 'loss', 'content': 0.08960124850273132, 'timestamp': '2025-10-01 04:26:42.561438', 'step': 7549, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:42.614410', 'step': 7549, 'epoch': 2} {'type': 'loss', 'content': 0.26617664098739624, 'timestamp': '2025-10-01 04:26:42.616826', 'step': 7550, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:42.672310', 'step': 7550, 'epoch': 2} {'type': 'loss', 'content': 0.09557810425758362, 'timestamp': '2025-10-01 04:26:42.674521', 'step': 7551, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:42.727833', 'step': 7551, 'epoch': 2} {'type': 'loss', 'content': 0.11717705428600311, 'timestamp': '2025-10-01 04:26:42.734683', 'step': 7552, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:42.787585', 'step': 7552, 'epoch': 2} {'type': 'loss', 'content': 0.1452472060918808, 'timestamp': '2025-10-01 04:26:42.789814', 'step': 7553, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:26:42.842737', 'step': 7553, 'epoch': 2} {'type': 'loss', 'content': 0.10296772420406342, 'timestamp': '2025-10-01 04:26:42.844855', 'step': 7554, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:42.898258', 'step': 7554, 'epoch': 2} {'type': 'loss', 'content': 0.11187753081321716, 'timestamp': '2025-10-01 04:26:42.900132', 'step': 7555, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:42.952920', 'step': 7555, 'epoch': 2} {'type': 'loss', 'content': 0.18285536766052246, 'timestamp': '2025-10-01 04:26:42.958532', 'step': 7556, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:43.026188', 'step': 7556, 'epoch': 2} {'type': 'loss', 'content': 0.1336032897233963, 'timestamp': '2025-10-01 04:26:43.028287', 'step': 7557, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:43.081165', 'step': 7557, 'epoch': 2} {'type': 'loss', 'content': 0.15825624763965607, 'timestamp': '2025-10-01 04:26:43.083304', 'step': 7558, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:43.137090', 'step': 7558, 'epoch': 2} {'type': 'loss', 'content': 0.127871572971344, 'timestamp': '2025-10-01 04:26:43.139418', 'step': 7559, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:43.192822', 'step': 7559, 'epoch': 2} {'type': 'loss', 'content': 0.1660519242286682, 'timestamp': '2025-10-01 04:26:43.198567', 'step': 7560, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:43.262860', 'step': 7560, 'epoch': 2} {'type': 'loss', 'content': 0.1863185167312622, 'timestamp': '2025-10-01 04:26:43.265663', 'step': 7561, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:43.319415', 'step': 7561, 'epoch': 2} {'type': 'loss', 'content': 0.13718324899673462, 'timestamp': '2025-10-01 04:26:43.321769', 'step': 7562, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:43.375209', 'step': 7562, 'epoch': 2} {'type': 'loss', 'content': 0.1968596875667572, 'timestamp': '2025-10-01 04:26:43.377301', 'step': 7563, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:43.434125', 'step': 7563, 'epoch': 2} {'type': 'loss', 'content': 0.17954765260219574, 'timestamp': '2025-10-01 04:26:43.440051', 'step': 7564, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:43.493149', 'step': 7564, 'epoch': 2} {'type': 'loss', 'content': 0.1676090806722641, 'timestamp': '2025-10-01 04:26:43.495270', 'step': 7565, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:43.549753', 'step': 7565, 'epoch': 2} {'type': 'loss', 'content': 0.14212512969970703, 'timestamp': '2025-10-01 04:26:43.559075', 'step': 7566, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:43.628377', 'step': 7566, 'epoch': 2} {'type': 'loss', 'content': 0.15913116931915283, 'timestamp': '2025-10-01 04:26:43.630720', 'step': 7567, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:43.687238', 'step': 7567, 'epoch': 2} {'type': 'loss', 'content': 0.1401071846485138, 'timestamp': '2025-10-01 04:26:43.694088', 'step': 7568, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:26:43.750589', 'step': 7568, 'epoch': 2} {'type': 'loss', 'content': 0.10998676717281342, 'timestamp': '2025-10-01 04:26:43.752853', 'step': 7569, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:43.810606', 'step': 7569, 'epoch': 2} {'type': 'loss', 'content': 0.21873871982097626, 'timestamp': '2025-10-01 04:26:43.812773', 'step': 7570, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:43.870004', 'step': 7570, 'epoch': 2} {'type': 'loss', 'content': 0.10422211140394211, 'timestamp': '2025-10-01 04:26:43.872184', 'step': 7571, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:43.929245', 'step': 7571, 'epoch': 2} {'type': 'loss', 'content': 0.09112770110368729, 'timestamp': '2025-10-01 04:26:43.935927', 'step': 7572, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:43.992810', 'step': 7572, 'epoch': 2} {'type': 'loss', 'content': 0.15237459540367126, 'timestamp': '2025-10-01 04:26:43.995008', 'step': 7573, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:44.050954', 'step': 7573, 'epoch': 2} {'type': 'loss', 'content': 0.2664529085159302, 'timestamp': '2025-10-01 04:26:44.053208', 'step': 7574, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:26:44.111976', 'step': 7574, 'epoch': 2} {'type': 'loss', 'content': 0.1042105183005333, 'timestamp': '2025-10-01 04:26:44.114310', 'step': 7575, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:44.172265', 'step': 7575, 'epoch': 2} {'type': 'loss', 'content': 0.21021346747875214, 'timestamp': '2025-10-01 04:26:44.178967', 'step': 7576, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:44.233589', 'step': 7576, 'epoch': 2} {'type': 'loss', 'content': 0.1618064045906067, 'timestamp': '2025-10-01 04:26:44.235595', 'step': 7577, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:44.290190', 'step': 7577, 'epoch': 2} {'type': 'loss', 'content': 0.18443194031715393, 'timestamp': '2025-10-01 04:26:44.292500', 'step': 7578, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:26:44.346877', 'step': 7578, 'epoch': 2} {'type': 'loss', 'content': 0.10083608329296112, 'timestamp': '2025-10-01 04:26:44.349097', 'step': 7579, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:44.403518', 'step': 7579, 'epoch': 2} {'type': 'loss', 'content': 0.25429877638816833, 'timestamp': '2025-10-01 04:26:44.411001', 'step': 7580, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:44.464775', 'step': 7580, 'epoch': 2} {'type': 'loss', 'content': 0.1668137013912201, 'timestamp': '2025-10-01 04:26:44.467407', 'step': 7581, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:26:44.521385', 'step': 7581, 'epoch': 2} {'type': 'loss', 'content': 0.14240793883800507, 'timestamp': '2025-10-01 04:26:44.523899', 'step': 7582, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:26:44.577058', 'step': 7582, 'epoch': 2} {'type': 'loss', 'content': 0.2509312927722931, 'timestamp': '2025-10-01 04:26:44.579915', 'step': 7583, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:44.647438', 'step': 7583, 'epoch': 2} {'type': 'loss', 'content': 0.11361831426620483, 'timestamp': '2025-10-01 04:26:44.654187', 'step': 7584, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:44.707988', 'step': 7584, 'epoch': 2} {'type': 'loss', 'content': 0.1037483662366867, 'timestamp': '2025-10-01 04:26:44.710054', 'step': 7585, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:44.763195', 'step': 7585, 'epoch': 2} {'type': 'loss', 'content': 0.10026472061872482, 'timestamp': '2025-10-01 04:26:44.765882', 'step': 7586, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:44.819818', 'step': 7586, 'epoch': 2} {'type': 'loss', 'content': 0.15496987104415894, 'timestamp': '2025-10-01 04:26:44.821827', 'step': 7587, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:44.875754', 'step': 7587, 'epoch': 2} {'type': 'loss', 'content': 0.3058789074420929, 'timestamp': '2025-10-01 04:26:44.881627', 'step': 7588, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:44.936822', 'step': 7588, 'epoch': 2} {'type': 'loss', 'content': 0.17705516517162323, 'timestamp': '2025-10-01 04:26:44.938937', 'step': 7589, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:44.993184', 'step': 7589, 'epoch': 2} {'type': 'loss', 'content': 0.15700195729732513, 'timestamp': '2025-10-01 04:26:44.996957', 'step': 7590, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:45.050309', 'step': 7590, 'epoch': 2} {'type': 'loss', 'content': 0.10848486423492432, 'timestamp': '2025-10-01 04:26:45.052702', 'step': 7591, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:45.105896', 'step': 7591, 'epoch': 2} {'type': 'loss', 'content': 0.1588600128889084, 'timestamp': '2025-10-01 04:26:45.112003', 'step': 7592, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:45.164482', 'step': 7592, 'epoch': 2} {'type': 'loss', 'content': 0.21167853474617004, 'timestamp': '2025-10-01 04:26:45.166675', 'step': 7593, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:45.220415', 'step': 7593, 'epoch': 2} {'type': 'loss', 'content': 0.23293447494506836, 'timestamp': '2025-10-01 04:26:45.222494', 'step': 7594, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:45.276650', 'step': 7594, 'epoch': 2} {'type': 'loss', 'content': 0.14515304565429688, 'timestamp': '2025-10-01 04:26:45.279372', 'step': 7595, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:45.333567', 'step': 7595, 'epoch': 2} {'type': 'loss', 'content': 0.13865813612937927, 'timestamp': '2025-10-01 04:26:45.339590', 'step': 7596, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:45.392307', 'step': 7596, 'epoch': 2} {'type': 'loss', 'content': 0.17898526787757874, 'timestamp': '2025-10-01 04:26:45.394559', 'step': 7597, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:45.447960', 'step': 7597, 'epoch': 2} {'type': 'loss', 'content': 0.14610961079597473, 'timestamp': '2025-10-01 04:26:45.450017', 'step': 7598, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:45.503456', 'step': 7598, 'epoch': 2} {'type': 'loss', 'content': 0.1591779738664627, 'timestamp': '2025-10-01 04:26:45.505571', 'step': 7599, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:26:45.559408', 'step': 7599, 'epoch': 2} {'type': 'loss', 'content': 0.21833346784114838, 'timestamp': '2025-10-01 04:26:45.565126', 'step': 7600, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:45.618878', 'step': 7600, 'epoch': 2} {'type': 'loss', 'content': 0.1577925980091095, 'timestamp': '2025-10-01 04:26:45.621219', 'step': 7601, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:45.676382', 'step': 7601, 'epoch': 2} {'type': 'loss', 'content': 0.1594124585390091, 'timestamp': '2025-10-01 04:26:45.678570', 'step': 7602, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:45.734471', 'step': 7602, 'epoch': 2} {'type': 'loss', 'content': 0.19507132470607758, 'timestamp': '2025-10-01 04:26:45.736754', 'step': 7603, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:45.789318', 'step': 7603, 'epoch': 2} {'type': 'loss', 'content': 0.15715278685092926, 'timestamp': '2025-10-01 04:26:45.795097', 'step': 7604, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:45.848942', 'step': 7604, 'epoch': 2} {'type': 'loss', 'content': 0.13099803030490875, 'timestamp': '2025-10-01 04:26:45.851122', 'step': 7605, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:45.904984', 'step': 7605, 'epoch': 2} {'type': 'loss', 'content': 0.20340774953365326, 'timestamp': '2025-10-01 04:26:45.907613', 'step': 7606, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:45.969887', 'step': 7606, 'epoch': 2} {'type': 'loss', 'content': 0.1323917806148529, 'timestamp': '2025-10-01 04:26:45.972008', 'step': 7607, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:46.025734', 'step': 7607, 'epoch': 2} {'type': 'loss', 'content': 0.14329063892364502, 'timestamp': '2025-10-01 04:26:46.031657', 'step': 7608, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:26:46.085223', 'step': 7608, 'epoch': 2} {'type': 'loss', 'content': 0.0834861695766449, 'timestamp': '2025-10-01 04:26:46.087417', 'step': 7609, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:46.142544', 'step': 7609, 'epoch': 2} {'type': 'loss', 'content': 0.14895924925804138, 'timestamp': '2025-10-01 04:26:46.145229', 'step': 7610, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:26:46.200285', 'step': 7610, 'epoch': 2} {'type': 'loss', 'content': 0.10360141843557358, 'timestamp': '2025-10-01 04:26:46.202539', 'step': 7611, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:26:46.256128', 'step': 7611, 'epoch': 2} {'type': 'loss', 'content': 0.09789302945137024, 'timestamp': '2025-10-01 04:26:46.262054', 'step': 7612, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:46.321179', 'step': 7612, 'epoch': 2} {'type': 'loss', 'content': 0.13175268471240997, 'timestamp': '2025-10-01 04:26:46.323351', 'step': 7613, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:46.393007', 'step': 7613, 'epoch': 2} {'type': 'loss', 'content': 0.18088267743587494, 'timestamp': '2025-10-01 04:26:46.395267', 'step': 7614, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:26:46.449192', 'step': 7614, 'epoch': 2} {'type': 'loss', 'content': 0.2775377929210663, 'timestamp': '2025-10-01 04:26:46.452113', 'step': 7615, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:46.510536', 'step': 7615, 'epoch': 2} {'type': 'loss', 'content': 0.18118372559547424, 'timestamp': '2025-10-01 04:26:46.516365', 'step': 7616, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:46.569223', 'step': 7616, 'epoch': 2} {'type': 'loss', 'content': 0.09675899893045425, 'timestamp': '2025-10-01 04:26:46.571366', 'step': 7617, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:46.624743', 'step': 7617, 'epoch': 2} {'type': 'loss', 'content': 0.1829572170972824, 'timestamp': '2025-10-01 04:26:46.626944', 'step': 7618, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:46.679809', 'step': 7618, 'epoch': 2} {'type': 'loss', 'content': 0.11892986297607422, 'timestamp': '2025-10-01 04:26:46.682905', 'step': 7619, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:46.735600', 'step': 7619, 'epoch': 2} {'type': 'loss', 'content': 0.15393196046352386, 'timestamp': '2025-10-01 04:26:46.741521', 'step': 7620, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:26:46.794875', 'step': 7620, 'epoch': 2} {'type': 'loss', 'content': 0.1847970187664032, 'timestamp': '2025-10-01 04:26:46.796963', 'step': 7621, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:46.850548', 'step': 7621, 'epoch': 2} {'type': 'loss', 'content': 0.23121491074562073, 'timestamp': '2025-10-01 04:26:46.852695', 'step': 7622, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:46.906220', 'step': 7622, 'epoch': 2} {'type': 'loss', 'content': 0.13149119913578033, 'timestamp': '2025-10-01 04:26:46.908307', 'step': 7623, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:46.962213', 'step': 7623, 'epoch': 2} {'type': 'loss', 'content': 0.1267029196023941, 'timestamp': '2025-10-01 04:26:46.968980', 'step': 7624, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:47.022093', 'step': 7624, 'epoch': 2} {'type': 'loss', 'content': 0.20491810142993927, 'timestamp': '2025-10-01 04:26:47.025118', 'step': 7625, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:26:47.078744', 'step': 7625, 'epoch': 2} {'type': 'loss', 'content': 0.11086485534906387, 'timestamp': '2025-10-01 04:26:47.080805', 'step': 7626, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:47.134729', 'step': 7626, 'epoch': 2} {'type': 'loss', 'content': 0.07758215814828873, 'timestamp': '2025-10-01 04:26:47.136893', 'step': 7627, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:47.193432', 'step': 7627, 'epoch': 2} {'type': 'loss', 'content': 0.12611374258995056, 'timestamp': '2025-10-01 04:26:47.199190', 'step': 7628, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:47.251714', 'step': 7628, 'epoch': 2} {'type': 'loss', 'content': 0.08764722943305969, 'timestamp': '2025-10-01 04:26:47.270099', 'step': 7629, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:47.357638', 'step': 7629, 'epoch': 2} {'type': 'loss', 'content': 0.1299217939376831, 'timestamp': '2025-10-01 04:26:47.365040', 'step': 7630, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:47.446712', 'step': 7630, 'epoch': 2} {'type': 'loss', 'content': 0.11069928854703903, 'timestamp': '2025-10-01 04:26:47.453059', 'step': 7631, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:47.540433', 'step': 7631, 'epoch': 2} {'type': 'loss', 'content': 0.1829521209001541, 'timestamp': '2025-10-01 04:26:47.548734', 'step': 7632, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:47.633156', 'step': 7632, 'epoch': 2} {'type': 'loss', 'content': 0.2444123476743698, 'timestamp': '2025-10-01 04:26:47.655498', 'step': 7633, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:47.731779', 'step': 7633, 'epoch': 2} {'type': 'loss', 'content': 0.16412603855133057, 'timestamp': '2025-10-01 04:26:47.744865', 'step': 7634, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:47.837708', 'step': 7634, 'epoch': 2} {'type': 'loss', 'content': 0.1911480724811554, 'timestamp': '2025-10-01 04:26:47.853279', 'step': 7635, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:47.932908', 'step': 7635, 'epoch': 2} {'type': 'loss', 'content': 0.2537257671356201, 'timestamp': '2025-10-01 04:26:47.955182', 'step': 7636, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:48.045237', 'step': 7636, 'epoch': 2} {'type': 'loss', 'content': 0.18879404664039612, 'timestamp': '2025-10-01 04:26:48.052912', 'step': 7637, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:48.140861', 'step': 7637, 'epoch': 2} {'type': 'loss', 'content': 0.06485435366630554, 'timestamp': '2025-10-01 04:26:48.150916', 'step': 7638, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:48.259527', 'step': 7638, 'epoch': 2} {'type': 'loss', 'content': 0.20439393818378448, 'timestamp': '2025-10-01 04:26:48.269451', 'step': 7639, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:48.361341', 'step': 7639, 'epoch': 2} {'type': 'loss', 'content': 0.13316960632801056, 'timestamp': '2025-10-01 04:26:48.376886', 'step': 7640, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:26:48.462753', 'step': 7640, 'epoch': 2} {'type': 'loss', 'content': 0.15252742171287537, 'timestamp': '2025-10-01 04:26:48.472412', 'step': 7641, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:48.578820', 'step': 7641, 'epoch': 2} {'type': 'loss', 'content': 0.1311456561088562, 'timestamp': '2025-10-01 04:26:48.581258', 'step': 7642, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:48.635240', 'step': 7642, 'epoch': 2} {'type': 'loss', 'content': 0.19191718101501465, 'timestamp': '2025-10-01 04:26:48.651725', 'step': 7643, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:48.706486', 'step': 7643, 'epoch': 2} {'type': 'loss', 'content': 0.11307930946350098, 'timestamp': '2025-10-01 04:26:48.712477', 'step': 7644, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:26:48.766432', 'step': 7644, 'epoch': 2} {'type': 'loss', 'content': 0.17308783531188965, 'timestamp': '2025-10-01 04:26:48.768965', 'step': 7645, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:48.824039', 'step': 7645, 'epoch': 2} {'type': 'loss', 'content': 0.09029921144247055, 'timestamp': '2025-10-01 04:26:48.826754', 'step': 7646, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:48.881484', 'step': 7646, 'epoch': 2} {'type': 'loss', 'content': 0.128029465675354, 'timestamp': '2025-10-01 04:26:48.886855', 'step': 7647, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:48.943467', 'step': 7647, 'epoch': 2} {'type': 'loss', 'content': 0.15882264077663422, 'timestamp': '2025-10-01 04:26:48.949489', 'step': 7648, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:26:49.005816', 'step': 7648, 'epoch': 2} {'type': 'loss', 'content': 0.17893867194652557, 'timestamp': '2025-10-01 04:26:49.007848', 'step': 7649, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:26:49.060929', 'step': 7649, 'epoch': 2} {'type': 'loss', 'content': 0.21953162550926208, 'timestamp': '2025-10-01 04:26:49.063176', 'step': 7650, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:49.132776', 'step': 7650, 'epoch': 2} {'type': 'loss', 'content': 0.08077652007341385, 'timestamp': '2025-10-01 04:26:49.135785', 'step': 7651, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:49.191402', 'step': 7651, 'epoch': 2} {'type': 'loss', 'content': 0.25694164633750916, 'timestamp': '2025-10-01 04:26:49.197293', 'step': 7652, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:49.250257', 'step': 7652, 'epoch': 2} {'type': 'loss', 'content': 0.1691245287656784, 'timestamp': '2025-10-01 04:26:49.252564', 'step': 7653, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:49.315562', 'step': 7653, 'epoch': 2} {'type': 'loss', 'content': 0.18659380078315735, 'timestamp': '2025-10-01 04:26:49.317812', 'step': 7654, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:26:49.372262', 'step': 7654, 'epoch': 2} {'type': 'loss', 'content': 0.1473909616470337, 'timestamp': '2025-10-01 04:26:49.376281', 'step': 7655, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:49.429951', 'step': 7655, 'epoch': 2} {'type': 'loss', 'content': 0.13385063409805298, 'timestamp': '2025-10-01 04:26:49.436080', 'step': 7656, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:49.491677', 'step': 7656, 'epoch': 2} {'type': 'loss', 'content': 0.16063031554222107, 'timestamp': '2025-10-01 04:26:49.493767', 'step': 7657, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:49.547611', 'step': 7657, 'epoch': 2} {'type': 'loss', 'content': 0.1911744624376297, 'timestamp': '2025-10-01 04:26:49.549863', 'step': 7658, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:49.603189', 'step': 7658, 'epoch': 2} {'type': 'loss', 'content': 0.07745476067066193, 'timestamp': '2025-10-01 04:26:49.607487', 'step': 7659, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:26:49.662012', 'step': 7659, 'epoch': 2} {'type': 'loss', 'content': 0.12274207174777985, 'timestamp': '2025-10-01 04:26:49.669830', 'step': 7660, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:49.723786', 'step': 7660, 'epoch': 2} {'type': 'loss', 'content': 0.2394774705171585, 'timestamp': '2025-10-01 04:26:49.726310', 'step': 7661, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:49.781968', 'step': 7661, 'epoch': 2} {'type': 'loss', 'content': 0.0985000729560852, 'timestamp': '2025-10-01 04:26:49.784467', 'step': 7662, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:26:49.847053', 'step': 7662, 'epoch': 2} {'type': 'loss', 'content': 0.12956871092319489, 'timestamp': '2025-10-01 04:26:49.849435', 'step': 7663, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:49.906475', 'step': 7663, 'epoch': 2} {'type': 'loss', 'content': 0.1897723227739334, 'timestamp': '2025-10-01 04:26:49.912729', 'step': 7664, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:49.965795', 'step': 7664, 'epoch': 2} {'type': 'loss', 'content': 0.21966144442558289, 'timestamp': '2025-10-01 04:26:49.969187', 'step': 7665, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:50.023803', 'step': 7665, 'epoch': 2} {'type': 'loss', 'content': 0.11625241488218307, 'timestamp': '2025-10-01 04:26:50.025985', 'step': 7666, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:50.080102', 'step': 7666, 'epoch': 2} {'type': 'loss', 'content': 0.18942923843860626, 'timestamp': '2025-10-01 04:26:50.082633', 'step': 7667, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:50.136258', 'step': 7667, 'epoch': 2} {'type': 'loss', 'content': 0.15044116973876953, 'timestamp': '2025-10-01 04:26:50.142087', 'step': 7668, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:50.203050', 'step': 7668, 'epoch': 2} {'type': 'loss', 'content': 0.17443037033081055, 'timestamp': '2025-10-01 04:26:50.205354', 'step': 7669, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:50.258650', 'step': 7669, 'epoch': 2} {'type': 'loss', 'content': 0.2979857325553894, 'timestamp': '2025-10-01 04:26:50.263642', 'step': 7670, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:50.317120', 'step': 7670, 'epoch': 2} {'type': 'loss', 'content': 0.134971484541893, 'timestamp': '2025-10-01 04:26:50.319430', 'step': 7671, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:50.372903', 'step': 7671, 'epoch': 2} {'type': 'loss', 'content': 0.1648888736963272, 'timestamp': '2025-10-01 04:26:50.378565', 'step': 7672, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:50.435109', 'step': 7672, 'epoch': 2} {'type': 'loss', 'content': 0.13549493253231049, 'timestamp': '2025-10-01 04:26:50.437638', 'step': 7673, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:50.500847', 'step': 7673, 'epoch': 2} {'type': 'loss', 'content': 0.24728791415691376, 'timestamp': '2025-10-01 04:26:50.502955', 'step': 7674, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:50.556805', 'step': 7674, 'epoch': 2} {'type': 'loss', 'content': 0.1264980435371399, 'timestamp': '2025-10-01 04:26:50.558931', 'step': 7675, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:50.612535', 'step': 7675, 'epoch': 2} {'type': 'loss', 'content': 0.23642151057720184, 'timestamp': '2025-10-01 04:26:50.618142', 'step': 7676, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:50.670806', 'step': 7676, 'epoch': 2} {'type': 'loss', 'content': 0.17216815054416656, 'timestamp': '2025-10-01 04:26:50.673210', 'step': 7677, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:50.726189', 'step': 7677, 'epoch': 2} {'type': 'loss', 'content': 0.15969300270080566, 'timestamp': '2025-10-01 04:26:50.728683', 'step': 7678, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:50.782394', 'step': 7678, 'epoch': 2} {'type': 'loss', 'content': 0.18843252956867218, 'timestamp': '2025-10-01 04:26:50.784939', 'step': 7679, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:50.841805', 'step': 7679, 'epoch': 2} {'type': 'loss', 'content': 0.13584230840206146, 'timestamp': '2025-10-01 04:26:50.848427', 'step': 7680, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:50.908694', 'step': 7680, 'epoch': 2} {'type': 'loss', 'content': 0.18532481789588928, 'timestamp': '2025-10-01 04:26:50.911917', 'step': 7681, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:50.970792', 'step': 7681, 'epoch': 2} {'type': 'loss', 'content': 0.08388892561197281, 'timestamp': '2025-10-01 04:26:50.973009', 'step': 7682, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:51.033759', 'step': 7682, 'epoch': 2} {'type': 'loss', 'content': 0.19859440624713898, 'timestamp': '2025-10-01 04:26:51.036129', 'step': 7683, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:51.120006', 'step': 7683, 'epoch': 2} {'type': 'loss', 'content': 0.08207564800977707, 'timestamp': '2025-10-01 04:26:51.125806', 'step': 7684, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:51.193279', 'step': 7684, 'epoch': 2} {'type': 'loss', 'content': 0.11896025389432907, 'timestamp': '2025-10-01 04:26:51.195483', 'step': 7685, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:51.249216', 'step': 7685, 'epoch': 2} {'type': 'loss', 'content': 0.1622212678194046, 'timestamp': '2025-10-01 04:26:51.251247', 'step': 7686, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:51.304342', 'step': 7686, 'epoch': 2} {'type': 'loss', 'content': 0.17983116209506989, 'timestamp': '2025-10-01 04:26:51.306419', 'step': 7687, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:51.359659', 'step': 7687, 'epoch': 2} {'type': 'loss', 'content': 0.14495494961738586, 'timestamp': '2025-10-01 04:26:51.366262', 'step': 7688, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:51.419466', 'step': 7688, 'epoch': 2} {'type': 'loss', 'content': 0.1760949343442917, 'timestamp': '2025-10-01 04:26:51.421673', 'step': 7689, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:51.475324', 'step': 7689, 'epoch': 2} {'type': 'loss', 'content': 0.10548228770494461, 'timestamp': '2025-10-01 04:26:51.489463', 'step': 7690, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:51.553651', 'step': 7690, 'epoch': 2} {'type': 'loss', 'content': 0.13506552577018738, 'timestamp': '2025-10-01 04:26:51.556242', 'step': 7691, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:51.609624', 'step': 7691, 'epoch': 2} {'type': 'loss', 'content': 0.21683062613010406, 'timestamp': '2025-10-01 04:26:51.615280', 'step': 7692, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:51.669152', 'step': 7692, 'epoch': 2} {'type': 'loss', 'content': 0.16747790575027466, 'timestamp': '2025-10-01 04:26:51.671169', 'step': 7693, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:51.724625', 'step': 7693, 'epoch': 2} {'type': 'loss', 'content': 0.1295221447944641, 'timestamp': '2025-10-01 04:26:51.726941', 'step': 7694, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:26:51.781453', 'step': 7694, 'epoch': 2} {'type': 'loss', 'content': 0.1994956135749817, 'timestamp': '2025-10-01 04:26:51.783412', 'step': 7695, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:26:51.836888', 'step': 7695, 'epoch': 2} {'type': 'loss', 'content': 0.10599008947610855, 'timestamp': '2025-10-01 04:26:51.842876', 'step': 7696, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:51.895477', 'step': 7696, 'epoch': 2} {'type': 'loss', 'content': 0.18382030725479126, 'timestamp': '2025-10-01 04:26:51.897793', 'step': 7697, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:51.951664', 'step': 7697, 'epoch': 2} {'type': 'loss', 'content': 0.26312199234962463, 'timestamp': '2025-10-01 04:26:51.954146', 'step': 7698, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:52.007566', 'step': 7698, 'epoch': 2} {'type': 'loss', 'content': 0.15390373766422272, 'timestamp': '2025-10-01 04:26:52.010824', 'step': 7699, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:26:52.064790', 'step': 7699, 'epoch': 2} {'type': 'loss', 'content': 0.09317639470100403, 'timestamp': '2025-10-01 04:26:52.070621', 'step': 7700, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:52.131942', 'step': 7700, 'epoch': 2} {'type': 'loss', 'content': 0.21498581767082214, 'timestamp': '2025-10-01 04:26:52.133916', 'step': 7701, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:52.187541', 'step': 7701, 'epoch': 2} {'type': 'loss', 'content': 0.11651597172021866, 'timestamp': '2025-10-01 04:26:52.189531', 'step': 7702, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:52.242872', 'step': 7702, 'epoch': 2} {'type': 'loss', 'content': 0.31499889492988586, 'timestamp': '2025-10-01 04:26:52.245119', 'step': 7703, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:52.298110', 'step': 7703, 'epoch': 2} {'type': 'loss', 'content': 0.18743909895420074, 'timestamp': '2025-10-01 04:26:52.305618', 'step': 7704, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:52.359003', 'step': 7704, 'epoch': 2} {'type': 'loss', 'content': 0.2025710940361023, 'timestamp': '2025-10-01 04:26:52.361597', 'step': 7705, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:52.414890', 'step': 7705, 'epoch': 2} {'type': 'loss', 'content': 0.2064308226108551, 'timestamp': '2025-10-01 04:26:52.416762', 'step': 7706, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:52.469494', 'step': 7706, 'epoch': 2} {'type': 'loss', 'content': 0.08848187327384949, 'timestamp': '2025-10-01 04:26:52.471603', 'step': 7707, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:52.525164', 'step': 7707, 'epoch': 2} {'type': 'loss', 'content': 0.10030166804790497, 'timestamp': '2025-10-01 04:26:52.530795', 'step': 7708, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:26:52.583659', 'step': 7708, 'epoch': 2} {'type': 'loss', 'content': 0.1413894146680832, 'timestamp': '2025-10-01 04:26:52.585666', 'step': 7709, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:52.638317', 'step': 7709, 'epoch': 2} {'type': 'loss', 'content': 0.1577264815568924, 'timestamp': '2025-10-01 04:26:52.640498', 'step': 7710, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:52.694127', 'step': 7710, 'epoch': 2} {'type': 'loss', 'content': 0.10847640037536621, 'timestamp': '2025-10-01 04:26:52.696358', 'step': 7711, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:52.749294', 'step': 7711, 'epoch': 2} {'type': 'loss', 'content': 0.17692504823207855, 'timestamp': '2025-10-01 04:26:52.755018', 'step': 7712, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:52.807469', 'step': 7712, 'epoch': 2} {'type': 'loss', 'content': 0.20331993699073792, 'timestamp': '2025-10-01 04:26:52.809588', 'step': 7713, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:52.863239', 'step': 7713, 'epoch': 2} {'type': 'loss', 'content': 0.12292568385601044, 'timestamp': '2025-10-01 04:26:52.865115', 'step': 7714, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:52.918180', 'step': 7714, 'epoch': 2} {'type': 'loss', 'content': 0.11283537745475769, 'timestamp': '2025-10-01 04:26:52.920266', 'step': 7715, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:52.973539', 'step': 7715, 'epoch': 2} {'type': 'loss', 'content': 0.08612735569477081, 'timestamp': '2025-10-01 04:26:52.979544', 'step': 7716, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:53.032147', 'step': 7716, 'epoch': 2} {'type': 'loss', 'content': 0.18760374188423157, 'timestamp': '2025-10-01 04:26:53.037124', 'step': 7717, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:53.093958', 'step': 7717, 'epoch': 2} {'type': 'loss', 'content': 0.11976191401481628, 'timestamp': '2025-10-01 04:26:53.096255', 'step': 7718, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:53.150029', 'step': 7718, 'epoch': 2} {'type': 'loss', 'content': 0.10538985580205917, 'timestamp': '2025-10-01 04:26:53.152010', 'step': 7719, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:53.205394', 'step': 7719, 'epoch': 2} {'type': 'loss', 'content': 0.21164371073246002, 'timestamp': '2025-10-01 04:26:53.211077', 'step': 7720, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:53.264612', 'step': 7720, 'epoch': 2} {'type': 'loss', 'content': 0.1269797533750534, 'timestamp': '2025-10-01 04:26:53.266779', 'step': 7721, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:53.320034', 'step': 7721, 'epoch': 2} {'type': 'loss', 'content': 0.08497770130634308, 'timestamp': '2025-10-01 04:26:53.322249', 'step': 7722, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:53.375698', 'step': 7722, 'epoch': 2} {'type': 'loss', 'content': 0.1444520801305771, 'timestamp': '2025-10-01 04:26:53.383610', 'step': 7723, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-01 04:26:53.437325', 'step': 7723, 'epoch': 2} {'type': 'loss', 'content': 0.1816055029630661, 'timestamp': '2025-10-01 04:26:53.443112', 'step': 7724, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:53.496115', 'step': 7724, 'epoch': 2} {'type': 'loss', 'content': 0.15701532363891602, 'timestamp': '2025-10-01 04:26:53.516680', 'step': 7725, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:53.570793', 'step': 7725, 'epoch': 2} {'type': 'loss', 'content': 0.08526042848825455, 'timestamp': '2025-10-01 04:26:53.574300', 'step': 7726, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:53.627714', 'step': 7726, 'epoch': 2} {'type': 'loss', 'content': 0.10056263208389282, 'timestamp': '2025-10-01 04:26:53.633407', 'step': 7727, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:53.686569', 'step': 7727, 'epoch': 2} {'type': 'loss', 'content': 0.21662554144859314, 'timestamp': '2025-10-01 04:26:53.692634', 'step': 7728, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:53.747950', 'step': 7728, 'epoch': 2} {'type': 'loss', 'content': 0.13328364491462708, 'timestamp': '2025-10-01 04:26:53.750101', 'step': 7729, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:53.803125', 'step': 7729, 'epoch': 2} {'type': 'loss', 'content': 0.09029385447502136, 'timestamp': '2025-10-01 04:26:53.805072', 'step': 7730, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:26:53.859887', 'step': 7730, 'epoch': 2} {'type': 'loss', 'content': 0.22328819334506989, 'timestamp': '2025-10-01 04:26:53.861943', 'step': 7731, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:53.915021', 'step': 7731, 'epoch': 2} {'type': 'loss', 'content': 0.19541089236736298, 'timestamp': '2025-10-01 04:26:53.920774', 'step': 7732, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:53.973081', 'step': 7732, 'epoch': 2} {'type': 'loss', 'content': 0.06385492533445358, 'timestamp': '2025-10-01 04:26:53.975354', 'step': 7733, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:54.028788', 'step': 7733, 'epoch': 2} {'type': 'loss', 'content': 0.13054068386554718, 'timestamp': '2025-10-01 04:26:54.031117', 'step': 7734, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:54.084083', 'step': 7734, 'epoch': 2} {'type': 'loss', 'content': 0.11851497739553452, 'timestamp': '2025-10-01 04:26:54.086076', 'step': 7735, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:54.138976', 'step': 7735, 'epoch': 2} {'type': 'loss', 'content': 0.0768064484000206, 'timestamp': '2025-10-01 04:26:54.144707', 'step': 7736, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:54.197542', 'step': 7736, 'epoch': 2} {'type': 'loss', 'content': 0.13880379498004913, 'timestamp': '2025-10-01 04:26:54.199779', 'step': 7737, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:54.252991', 'step': 7737, 'epoch': 2} {'type': 'loss', 'content': 0.15326906740665436, 'timestamp': '2025-10-01 04:26:54.254891', 'step': 7738, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:54.310224', 'step': 7738, 'epoch': 2} {'type': 'loss', 'content': 0.15725177526474, 'timestamp': '2025-10-01 04:26:54.312291', 'step': 7739, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:54.365666', 'step': 7739, 'epoch': 2} {'type': 'loss', 'content': 0.1969587802886963, 'timestamp': '2025-10-01 04:26:54.371553', 'step': 7740, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:54.424990', 'step': 7740, 'epoch': 2} {'type': 'loss', 'content': 0.06287483870983124, 'timestamp': '2025-10-01 04:26:54.427460', 'step': 7741, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:54.493219', 'step': 7741, 'epoch': 2} {'type': 'loss', 'content': 0.12725961208343506, 'timestamp': '2025-10-01 04:26:54.495283', 'step': 7742, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:26:54.548545', 'step': 7742, 'epoch': 2} {'type': 'loss', 'content': 0.10502426326274872, 'timestamp': '2025-10-01 04:26:54.551125', 'step': 7743, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:26:54.605520', 'step': 7743, 'epoch': 2} {'type': 'loss', 'content': 0.07965505868196487, 'timestamp': '2025-10-01 04:26:54.611153', 'step': 7744, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:26:54.664477', 'step': 7744, 'epoch': 2} {'type': 'loss', 'content': 0.1568809151649475, 'timestamp': '2025-10-01 04:26:54.666558', 'step': 7745, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:54.729678', 'step': 7745, 'epoch': 2} {'type': 'loss', 'content': 0.22710579633712769, 'timestamp': '2025-10-01 04:26:54.731936', 'step': 7746, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:54.786450', 'step': 7746, 'epoch': 2} {'type': 'loss', 'content': 0.23259006440639496, 'timestamp': '2025-10-01 04:26:54.789214', 'step': 7747, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:54.846697', 'step': 7747, 'epoch': 2} {'type': 'loss', 'content': 0.27993065118789673, 'timestamp': '2025-10-01 04:26:54.853110', 'step': 7748, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:54.909057', 'step': 7748, 'epoch': 2} {'type': 'loss', 'content': 0.1912536323070526, 'timestamp': '2025-10-01 04:26:54.911239', 'step': 7749, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:26:54.964550', 'step': 7749, 'epoch': 2} {'type': 'loss', 'content': 0.13184086978435516, 'timestamp': '2025-10-01 04:26:54.966548', 'step': 7750, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:26:55.019806', 'step': 7750, 'epoch': 2} {'type': 'loss', 'content': 0.10179095715284348, 'timestamp': '2025-10-01 04:26:55.021934', 'step': 7751, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:55.075268', 'step': 7751, 'epoch': 2} {'type': 'loss', 'content': 0.21102400124073029, 'timestamp': '2025-10-01 04:26:55.081503', 'step': 7752, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:26:55.136176', 'step': 7752, 'epoch': 2} {'type': 'loss', 'content': 0.2573900520801544, 'timestamp': '2025-10-01 04:26:55.139160', 'step': 7753, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:55.192085', 'step': 7753, 'epoch': 2} {'type': 'loss', 'content': 0.2017437070608139, 'timestamp': '2025-10-01 04:26:55.195002', 'step': 7754, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:55.250251', 'step': 7754, 'epoch': 2} {'type': 'loss', 'content': 0.14867521822452545, 'timestamp': '2025-10-01 04:26:55.253039', 'step': 7755, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:55.306470', 'step': 7755, 'epoch': 2} {'type': 'loss', 'content': 0.10849827527999878, 'timestamp': '2025-10-01 04:26:55.312048', 'step': 7756, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:26:55.365012', 'step': 7756, 'epoch': 2} {'type': 'loss', 'content': 0.14804008603096008, 'timestamp': '2025-10-01 04:26:55.367550', 'step': 7757, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:55.421820', 'step': 7757, 'epoch': 2} {'type': 'loss', 'content': 0.12432904541492462, 'timestamp': '2025-10-01 04:26:55.424911', 'step': 7758, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:55.477957', 'step': 7758, 'epoch': 2} {'type': 'loss', 'content': 0.16739575564861298, 'timestamp': '2025-10-01 04:26:55.480559', 'step': 7759, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:55.535804', 'step': 7759, 'epoch': 2} {'type': 'loss', 'content': 0.16405323147773743, 'timestamp': '2025-10-01 04:26:55.541911', 'step': 7760, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:55.594930', 'step': 7760, 'epoch': 2} {'type': 'loss', 'content': 0.21295297145843506, 'timestamp': '2025-10-01 04:26:55.597121', 'step': 7761, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:26:55.651249', 'step': 7761, 'epoch': 2} {'type': 'loss', 'content': 0.17915034294128418, 'timestamp': '2025-10-01 04:26:55.653562', 'step': 7762, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:55.707232', 'step': 7762, 'epoch': 2} {'type': 'loss', 'content': 0.1467297077178955, 'timestamp': '2025-10-01 04:26:55.709252', 'step': 7763, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:55.762178', 'step': 7763, 'epoch': 2} {'type': 'loss', 'content': 0.13408194482326508, 'timestamp': '2025-10-01 04:26:55.767724', 'step': 7764, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-01 04:26:55.835477', 'step': 7764, 'epoch': 2} {'type': 'loss', 'content': 0.14935055375099182, 'timestamp': '2025-10-01 04:26:55.849086', 'step': 7765, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:55.916633', 'step': 7765, 'epoch': 2} {'type': 'loss', 'content': 0.21173954010009766, 'timestamp': '2025-10-01 04:26:55.918517', 'step': 7766, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:55.972011', 'step': 7766, 'epoch': 2} {'type': 'loss', 'content': 0.1722051203250885, 'timestamp': '2025-10-01 04:26:55.974723', 'step': 7767, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:56.028911', 'step': 7767, 'epoch': 2} {'type': 'loss', 'content': 0.10663139075040817, 'timestamp': '2025-10-01 04:26:56.035997', 'step': 7768, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:56.089087', 'step': 7768, 'epoch': 2} {'type': 'loss', 'content': 0.11650846153497696, 'timestamp': '2025-10-01 04:26:56.091421', 'step': 7769, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:26:56.145646', 'step': 7769, 'epoch': 2} {'type': 'loss', 'content': 0.17437678575515747, 'timestamp': '2025-10-01 04:26:56.147869', 'step': 7770, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:56.204731', 'step': 7770, 'epoch': 2} {'type': 'loss', 'content': 0.18100619316101074, 'timestamp': '2025-10-01 04:26:56.207379', 'step': 7771, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:56.262025', 'step': 7771, 'epoch': 2} {'type': 'loss', 'content': 0.10993334650993347, 'timestamp': '2025-10-01 04:26:56.267934', 'step': 7772, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:56.323818', 'step': 7772, 'epoch': 2} {'type': 'loss', 'content': 0.0683046355843544, 'timestamp': '2025-10-01 04:26:56.326109', 'step': 7773, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:56.380993', 'step': 7773, 'epoch': 2} {'type': 'loss', 'content': 0.13615679740905762, 'timestamp': '2025-10-01 04:26:56.383260', 'step': 7774, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:56.439181', 'step': 7774, 'epoch': 2} {'type': 'loss', 'content': 0.11720990389585495, 'timestamp': '2025-10-01 04:26:56.441862', 'step': 7775, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:56.497717', 'step': 7775, 'epoch': 2} {'type': 'loss', 'content': 0.09230651706457138, 'timestamp': '2025-10-01 04:26:56.504522', 'step': 7776, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:56.558744', 'step': 7776, 'epoch': 2} {'type': 'loss', 'content': 0.10055436193943024, 'timestamp': '2025-10-01 04:26:56.560853', 'step': 7777, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:56.614978', 'step': 7777, 'epoch': 2} {'type': 'loss', 'content': 0.10169436782598495, 'timestamp': '2025-10-01 04:26:56.617327', 'step': 7778, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:56.671010', 'step': 7778, 'epoch': 2} {'type': 'loss', 'content': 0.1820807307958603, 'timestamp': '2025-10-01 04:26:56.673613', 'step': 7779, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:56.727853', 'step': 7779, 'epoch': 2} {'type': 'loss', 'content': 0.1734393686056137, 'timestamp': '2025-10-01 04:26:56.733603', 'step': 7780, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:56.788757', 'step': 7780, 'epoch': 2} {'type': 'loss', 'content': 0.2284790575504303, 'timestamp': '2025-10-01 04:26:56.790987', 'step': 7781, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:56.845244', 'step': 7781, 'epoch': 2} {'type': 'loss', 'content': 0.14180336892604828, 'timestamp': '2025-10-01 04:26:56.847524', 'step': 7782, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:56.902610', 'step': 7782, 'epoch': 2} {'type': 'loss', 'content': 0.1040971651673317, 'timestamp': '2025-10-01 04:26:56.905889', 'step': 7783, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:56.960125', 'step': 7783, 'epoch': 2} {'type': 'loss', 'content': 0.1057036742568016, 'timestamp': '2025-10-01 04:26:56.966331', 'step': 7784, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:26:57.021540', 'step': 7784, 'epoch': 2} {'type': 'loss', 'content': 0.11257539689540863, 'timestamp': '2025-10-01 04:26:57.024035', 'step': 7785, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:57.078396', 'step': 7785, 'epoch': 2} {'type': 'loss', 'content': 0.184096559882164, 'timestamp': '2025-10-01 04:26:57.081140', 'step': 7786, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:57.137875', 'step': 7786, 'epoch': 2} {'type': 'loss', 'content': 0.11945612728595734, 'timestamp': '2025-10-01 04:26:57.140394', 'step': 7787, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:57.211210', 'step': 7787, 'epoch': 2} {'type': 'loss', 'content': 0.17815762758255005, 'timestamp': '2025-10-01 04:26:57.217537', 'step': 7788, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:57.272694', 'step': 7788, 'epoch': 2} {'type': 'loss', 'content': 0.1945769190788269, 'timestamp': '2025-10-01 04:26:57.275138', 'step': 7789, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:26:57.332308', 'step': 7789, 'epoch': 2} {'type': 'loss', 'content': 0.09232690185308456, 'timestamp': '2025-10-01 04:26:57.334646', 'step': 7790, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:26:57.389880', 'step': 7790, 'epoch': 2} {'type': 'loss', 'content': 0.1754065454006195, 'timestamp': '2025-10-01 04:26:57.392492', 'step': 7791, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:57.457718', 'step': 7791, 'epoch': 2} {'type': 'loss', 'content': 0.19401048123836517, 'timestamp': '2025-10-01 04:26:57.463959', 'step': 7792, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:57.518946', 'step': 7792, 'epoch': 2} {'type': 'loss', 'content': 0.09994731843471527, 'timestamp': '2025-10-01 04:26:57.521893', 'step': 7793, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:57.576900', 'step': 7793, 'epoch': 2} {'type': 'loss', 'content': 0.1784554123878479, 'timestamp': '2025-10-01 04:26:57.579172', 'step': 7794, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:57.634312', 'step': 7794, 'epoch': 2} {'type': 'loss', 'content': 0.0823870450258255, 'timestamp': '2025-10-01 04:26:57.636968', 'step': 7795, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:57.691658', 'step': 7795, 'epoch': 2} {'type': 'loss', 'content': 0.14409193396568298, 'timestamp': '2025-10-01 04:26:57.704171', 'step': 7796, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:57.763934', 'step': 7796, 'epoch': 2} {'type': 'loss', 'content': 0.17824192345142365, 'timestamp': '2025-10-01 04:26:57.766050', 'step': 7797, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:26:57.826361', 'step': 7797, 'epoch': 2} {'type': 'loss', 'content': 0.10998157411813736, 'timestamp': '2025-10-01 04:26:57.829419', 'step': 7798, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:26:57.885735', 'step': 7798, 'epoch': 2} {'type': 'loss', 'content': 0.16449452936649323, 'timestamp': '2025-10-01 04:26:57.888013', 'step': 7799, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:57.947572', 'step': 7799, 'epoch': 2} {'type': 'loss', 'content': 0.14306366443634033, 'timestamp': '2025-10-01 04:26:57.953654', 'step': 7800, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:58.014474', 'step': 7800, 'epoch': 2} {'type': 'loss', 'content': 0.1437174379825592, 'timestamp': '2025-10-01 04:26:58.017693', 'step': 7801, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:58.072304', 'step': 7801, 'epoch': 2} {'type': 'loss', 'content': 0.15144407749176025, 'timestamp': '2025-10-01 04:26:58.074619', 'step': 7802, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:58.138366', 'step': 7802, 'epoch': 2} {'type': 'loss', 'content': 0.14279218018054962, 'timestamp': '2025-10-01 04:26:58.140591', 'step': 7803, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:58.194725', 'step': 7803, 'epoch': 2} {'type': 'loss', 'content': 0.14189353585243225, 'timestamp': '2025-10-01 04:26:58.200960', 'step': 7804, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:26:58.257324', 'step': 7804, 'epoch': 2} {'type': 'loss', 'content': 0.20459172129631042, 'timestamp': '2025-10-01 04:26:58.259699', 'step': 7805, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:58.325730', 'step': 7805, 'epoch': 2} {'type': 'loss', 'content': 0.2217872142791748, 'timestamp': '2025-10-01 04:26:58.331327', 'step': 7806, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:58.388438', 'step': 7806, 'epoch': 2} {'type': 'loss', 'content': 0.22820353507995605, 'timestamp': '2025-10-01 04:26:58.390632', 'step': 7807, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:58.445257', 'step': 7807, 'epoch': 2} {'type': 'loss', 'content': 0.12520110607147217, 'timestamp': '2025-10-01 04:26:58.451027', 'step': 7808, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:58.504970', 'step': 7808, 'epoch': 2} {'type': 'loss', 'content': 0.1847522109746933, 'timestamp': '2025-10-01 04:26:58.507294', 'step': 7809, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:58.561912', 'step': 7809, 'epoch': 2} {'type': 'loss', 'content': 0.08641096204519272, 'timestamp': '2025-10-01 04:26:58.568440', 'step': 7810, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:58.628098', 'step': 7810, 'epoch': 2} {'type': 'loss', 'content': 0.1349041610956192, 'timestamp': '2025-10-01 04:26:58.640018', 'step': 7811, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:58.702090', 'step': 7811, 'epoch': 2} {'type': 'loss', 'content': 0.14502766728401184, 'timestamp': '2025-10-01 04:26:58.709383', 'step': 7812, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:26:58.771038', 'step': 7812, 'epoch': 2} {'type': 'loss', 'content': 0.0827380046248436, 'timestamp': '2025-10-01 04:26:58.773331', 'step': 7813, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:58.831223', 'step': 7813, 'epoch': 2} {'type': 'loss', 'content': 0.08415528386831284, 'timestamp': '2025-10-01 04:26:58.833505', 'step': 7814, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:58.892739', 'step': 7814, 'epoch': 2} {'type': 'loss', 'content': 0.06553886085748672, 'timestamp': '2025-10-01 04:26:58.897202', 'step': 7815, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:58.956840', 'step': 7815, 'epoch': 2} {'type': 'loss', 'content': 0.27118539810180664, 'timestamp': '2025-10-01 04:26:58.977507', 'step': 7816, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:26:59.034799', 'step': 7816, 'epoch': 2} {'type': 'loss', 'content': 0.1549835503101349, 'timestamp': '2025-10-01 04:26:59.036857', 'step': 7817, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:59.093110', 'step': 7817, 'epoch': 2} {'type': 'loss', 'content': 0.05633537843823433, 'timestamp': '2025-10-01 04:26:59.095121', 'step': 7818, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:59.160718', 'step': 7818, 'epoch': 2} {'type': 'loss', 'content': 0.15110164880752563, 'timestamp': '2025-10-01 04:26:59.164504', 'step': 7819, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:59.218855', 'step': 7819, 'epoch': 2} {'type': 'loss', 'content': 0.170933797955513, 'timestamp': '2025-10-01 04:26:59.225002', 'step': 7820, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:59.277992', 'step': 7820, 'epoch': 2} {'type': 'loss', 'content': 0.10041235387325287, 'timestamp': '2025-10-01 04:26:59.279963', 'step': 7821, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:59.334570', 'step': 7821, 'epoch': 2} {'type': 'loss', 'content': 0.12578855454921722, 'timestamp': '2025-10-01 04:26:59.337101', 'step': 7822, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:26:59.391559', 'step': 7822, 'epoch': 2} {'type': 'loss', 'content': 0.07511299103498459, 'timestamp': '2025-10-01 04:26:59.394007', 'step': 7823, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:59.446997', 'step': 7823, 'epoch': 2} {'type': 'loss', 'content': 0.10862191021442413, 'timestamp': '2025-10-01 04:26:59.453140', 'step': 7824, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:26:59.506579', 'step': 7824, 'epoch': 2} {'type': 'loss', 'content': 0.23572319746017456, 'timestamp': '2025-10-01 04:26:59.509196', 'step': 7825, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:59.563372', 'step': 7825, 'epoch': 2} {'type': 'loss', 'content': 0.1441720724105835, 'timestamp': '2025-10-01 04:26:59.566421', 'step': 7826, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:26:59.622561', 'step': 7826, 'epoch': 2} {'type': 'loss', 'content': 0.29663577675819397, 'timestamp': '2025-10-01 04:26:59.624831', 'step': 7827, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:59.691729', 'step': 7827, 'epoch': 2} {'type': 'loss', 'content': 0.10547570884227753, 'timestamp': '2025-10-01 04:26:59.697670', 'step': 7828, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:59.750265', 'step': 7828, 'epoch': 2} {'type': 'loss', 'content': 0.179413840174675, 'timestamp': '2025-10-01 04:26:59.752033', 'step': 7829, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:26:59.806778', 'step': 7829, 'epoch': 2} {'type': 'loss', 'content': 0.0719032809138298, 'timestamp': '2025-10-01 04:26:59.809435', 'step': 7830, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:59.863178', 'step': 7830, 'epoch': 2} {'type': 'loss', 'content': 0.16161225736141205, 'timestamp': '2025-10-01 04:26:59.865137', 'step': 7831, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:26:59.918877', 'step': 7831, 'epoch': 2} {'type': 'loss', 'content': 0.20672720670700073, 'timestamp': '2025-10-01 04:26:59.925150', 'step': 7832, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:26:59.979780', 'step': 7832, 'epoch': 2} {'type': 'loss', 'content': 0.1185324564576149, 'timestamp': '2025-10-01 04:26:59.981999', 'step': 7833, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:00.036527', 'step': 7833, 'epoch': 2} {'type': 'loss', 'content': 0.13415974378585815, 'timestamp': '2025-10-01 04:27:00.039318', 'step': 7834, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:00.093006', 'step': 7834, 'epoch': 2} {'type': 'loss', 'content': 0.12553933262825012, 'timestamp': '2025-10-01 04:27:00.095328', 'step': 7835, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:00.148843', 'step': 7835, 'epoch': 2} {'type': 'loss', 'content': 0.10416162014007568, 'timestamp': '2025-10-01 04:27:00.155034', 'step': 7836, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:00.209033', 'step': 7836, 'epoch': 2} {'type': 'loss', 'content': 0.18543502688407898, 'timestamp': '2025-10-01 04:27:00.210874', 'step': 7837, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:00.264621', 'step': 7837, 'epoch': 2} {'type': 'loss', 'content': 0.11264382302761078, 'timestamp': '2025-10-01 04:27:00.266972', 'step': 7838, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:00.339122', 'step': 7838, 'epoch': 2} {'type': 'loss', 'content': 0.14282894134521484, 'timestamp': '2025-10-01 04:27:00.341228', 'step': 7839, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:00.395465', 'step': 7839, 'epoch': 2} {'type': 'loss', 'content': 0.2512615919113159, 'timestamp': '2025-10-01 04:27:00.401825', 'step': 7840, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:00.455486', 'step': 7840, 'epoch': 2} {'type': 'loss', 'content': 0.1725892871618271, 'timestamp': '2025-10-01 04:27:00.459119', 'step': 7841, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:00.528252', 'step': 7841, 'epoch': 2} {'type': 'loss', 'content': 0.2755638659000397, 'timestamp': '2025-10-01 04:27:00.530393', 'step': 7842, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:00.586230', 'step': 7842, 'epoch': 2} {'type': 'loss', 'content': 0.07814041525125504, 'timestamp': '2025-10-01 04:27:00.588261', 'step': 7843, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:00.643236', 'step': 7843, 'epoch': 2} {'type': 'loss', 'content': 0.1397959291934967, 'timestamp': '2025-10-01 04:27:00.648944', 'step': 7844, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:00.703160', 'step': 7844, 'epoch': 2} {'type': 'loss', 'content': 0.1499822437763214, 'timestamp': '2025-10-01 04:27:00.705252', 'step': 7845, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:00.758643', 'step': 7845, 'epoch': 2} {'type': 'loss', 'content': 0.10096612572669983, 'timestamp': '2025-10-01 04:27:00.760836', 'step': 7846, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:00.827594', 'step': 7846, 'epoch': 2} {'type': 'loss', 'content': 0.06215234100818634, 'timestamp': '2025-10-01 04:27:00.829772', 'step': 7847, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:27:00.883708', 'step': 7847, 'epoch': 2} {'type': 'loss', 'content': 0.21306709945201874, 'timestamp': '2025-10-01 04:27:00.889807', 'step': 7848, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:00.953756', 'step': 7848, 'epoch': 2} {'type': 'loss', 'content': 0.1638907790184021, 'timestamp': '2025-10-01 04:27:00.955668', 'step': 7849, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:27:01.008199', 'step': 7849, 'epoch': 2} {'type': 'loss', 'content': 0.1264667510986328, 'timestamp': '2025-10-01 04:27:01.010542', 'step': 7850, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:01.076587', 'step': 7850, 'epoch': 2} {'type': 'loss', 'content': 0.16683630645275116, 'timestamp': '2025-10-01 04:27:01.078694', 'step': 7851, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:27:01.131433', 'step': 7851, 'epoch': 2} {'type': 'loss', 'content': 0.1684921234846115, 'timestamp': '2025-10-01 04:27:01.138023', 'step': 7852, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:01.191599', 'step': 7852, 'epoch': 2} {'type': 'loss', 'content': 0.1205853745341301, 'timestamp': '2025-10-01 04:27:01.193859', 'step': 7853, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:01.247169', 'step': 7853, 'epoch': 2} {'type': 'loss', 'content': 0.09703748673200607, 'timestamp': '2025-10-01 04:27:01.249182', 'step': 7854, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:27:01.302567', 'step': 7854, 'epoch': 2} {'type': 'loss', 'content': 0.21127867698669434, 'timestamp': '2025-10-01 04:27:01.304668', 'step': 7855, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:01.358700', 'step': 7855, 'epoch': 2} {'type': 'loss', 'content': 0.10235098749399185, 'timestamp': '2025-10-01 04:27:01.365648', 'step': 7856, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:01.418478', 'step': 7856, 'epoch': 2} {'type': 'loss', 'content': 0.10559212416410446, 'timestamp': '2025-10-01 04:27:01.420928', 'step': 7857, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:01.474940', 'step': 7857, 'epoch': 2} {'type': 'loss', 'content': 0.2729276418685913, 'timestamp': '2025-10-01 04:27:01.477066', 'step': 7858, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:01.538165', 'step': 7858, 'epoch': 2} {'type': 'loss', 'content': 0.15780802071094513, 'timestamp': '2025-10-01 04:27:01.540234', 'step': 7859, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:01.593318', 'step': 7859, 'epoch': 2} {'type': 'loss', 'content': 0.12393910437822342, 'timestamp': '2025-10-01 04:27:01.606617', 'step': 7860, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:27:01.688028', 'step': 7860, 'epoch': 2} {'type': 'loss', 'content': 0.1941126137971878, 'timestamp': '2025-10-01 04:27:01.690012', 'step': 7861, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:01.743521', 'step': 7861, 'epoch': 2} {'type': 'loss', 'content': 0.13366717100143433, 'timestamp': '2025-10-01 04:27:01.745583', 'step': 7862, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:27:01.799575', 'step': 7862, 'epoch': 2} {'type': 'loss', 'content': 0.1499694287776947, 'timestamp': '2025-10-01 04:27:01.802833', 'step': 7863, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:01.855823', 'step': 7863, 'epoch': 2} {'type': 'loss', 'content': 0.09817605465650558, 'timestamp': '2025-10-01 04:27:01.861581', 'step': 7864, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:01.914471', 'step': 7864, 'epoch': 2} {'type': 'loss', 'content': 0.17093339562416077, 'timestamp': '2025-10-01 04:27:01.916310', 'step': 7865, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:27:01.969529', 'step': 7865, 'epoch': 2} {'type': 'loss', 'content': 0.10471837222576141, 'timestamp': '2025-10-01 04:27:01.971386', 'step': 7866, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:02.026623', 'step': 7866, 'epoch': 2} {'type': 'loss', 'content': 0.08432845026254654, 'timestamp': '2025-10-01 04:27:02.028540', 'step': 7867, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:27:02.082605', 'step': 7867, 'epoch': 2} {'type': 'loss', 'content': 0.1320754438638687, 'timestamp': '2025-10-01 04:27:02.097413', 'step': 7868, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:27:02.151686', 'step': 7868, 'epoch': 2} {'type': 'loss', 'content': 0.14179280400276184, 'timestamp': '2025-10-01 04:27:02.153719', 'step': 7869, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:02.208680', 'step': 7869, 'epoch': 2} {'type': 'loss', 'content': 0.17026585340499878, 'timestamp': '2025-10-01 04:27:02.210913', 'step': 7870, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:02.264109', 'step': 7870, 'epoch': 2} {'type': 'loss', 'content': 0.2266772836446762, 'timestamp': '2025-10-01 04:27:02.266372', 'step': 7871, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:27:02.320054', 'step': 7871, 'epoch': 2} {'type': 'loss', 'content': 0.14467276632785797, 'timestamp': '2025-10-01 04:27:02.325880', 'step': 7872, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:02.379230', 'step': 7872, 'epoch': 2} {'type': 'loss', 'content': 0.14107738435268402, 'timestamp': '2025-10-01 04:27:02.381216', 'step': 7873, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:02.439617', 'step': 7873, 'epoch': 2} {'type': 'loss', 'content': 0.12331218272447586, 'timestamp': '2025-10-01 04:27:02.441808', 'step': 7874, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:02.495436', 'step': 7874, 'epoch': 2} {'type': 'loss', 'content': 0.10529699176549911, 'timestamp': '2025-10-01 04:27:02.497692', 'step': 7875, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:02.551932', 'step': 7875, 'epoch': 2} {'type': 'loss', 'content': 0.1013086810708046, 'timestamp': '2025-10-01 04:27:02.557707', 'step': 7876, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:02.611710', 'step': 7876, 'epoch': 2} {'type': 'loss', 'content': 0.11122255027294159, 'timestamp': '2025-10-01 04:27:02.613667', 'step': 7877, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:02.667752', 'step': 7877, 'epoch': 2} {'type': 'loss', 'content': 0.11594373732805252, 'timestamp': '2025-10-01 04:27:02.669841', 'step': 7878, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:27:02.723308', 'step': 7878, 'epoch': 2} {'type': 'loss', 'content': 0.09793581813573837, 'timestamp': '2025-10-01 04:27:02.725189', 'step': 7879, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:02.786748', 'step': 7879, 'epoch': 2} {'type': 'loss', 'content': 0.14021629095077515, 'timestamp': '2025-10-01 04:27:02.792432', 'step': 7880, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:02.844920', 'step': 7880, 'epoch': 2} {'type': 'loss', 'content': 0.11355742812156677, 'timestamp': '2025-10-01 04:27:02.846859', 'step': 7881, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:02.899960', 'step': 7881, 'epoch': 2} {'type': 'loss', 'content': 0.17438793182373047, 'timestamp': '2025-10-01 04:27:02.902483', 'step': 7882, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:02.956480', 'step': 7882, 'epoch': 2} {'type': 'loss', 'content': 0.12801559269428253, 'timestamp': '2025-10-01 04:27:02.968596', 'step': 7883, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:03.025121', 'step': 7883, 'epoch': 2} {'type': 'loss', 'content': 0.12845046818256378, 'timestamp': '2025-10-01 04:27:03.032943', 'step': 7884, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:03.104487', 'step': 7884, 'epoch': 2} {'type': 'loss', 'content': 0.09941843152046204, 'timestamp': '2025-10-01 04:27:03.106797', 'step': 7885, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:03.160209', 'step': 7885, 'epoch': 2} {'type': 'loss', 'content': 0.15380865335464478, 'timestamp': '2025-10-01 04:27:03.163709', 'step': 7886, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:03.223039', 'step': 7886, 'epoch': 2} {'type': 'loss', 'content': 0.24130168557167053, 'timestamp': '2025-10-01 04:27:03.228962', 'step': 7887, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:27:03.282378', 'step': 7887, 'epoch': 2} {'type': 'loss', 'content': 0.20474690198898315, 'timestamp': '2025-10-01 04:27:03.292897', 'step': 7888, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:03.346834', 'step': 7888, 'epoch': 2} {'type': 'loss', 'content': 0.13751719892024994, 'timestamp': '2025-10-01 04:27:03.348840', 'step': 7889, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:03.402732', 'step': 7889, 'epoch': 2} {'type': 'loss', 'content': 0.05462193861603737, 'timestamp': '2025-10-01 04:27:03.404937', 'step': 7890, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:27:03.467590', 'step': 7890, 'epoch': 2} {'type': 'loss', 'content': 0.11837702989578247, 'timestamp': '2025-10-01 04:27:03.469626', 'step': 7891, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:03.523871', 'step': 7891, 'epoch': 2} {'type': 'loss', 'content': 0.16257405281066895, 'timestamp': '2025-10-01 04:27:03.529901', 'step': 7892, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:03.597154', 'step': 7892, 'epoch': 2} {'type': 'loss', 'content': 0.10850565135478973, 'timestamp': '2025-10-01 04:27:03.607444', 'step': 7893, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:03.660849', 'step': 7893, 'epoch': 2} {'type': 'loss', 'content': 0.07786701619625092, 'timestamp': '2025-10-01 04:27:03.665494', 'step': 7894, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:03.721452', 'step': 7894, 'epoch': 2} {'type': 'loss', 'content': 0.23898349702358246, 'timestamp': '2025-10-01 04:27:03.726587', 'step': 7895, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:03.780856', 'step': 7895, 'epoch': 2} {'type': 'loss', 'content': 0.11942027509212494, 'timestamp': '2025-10-01 04:27:03.789290', 'step': 7896, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:03.843440', 'step': 7896, 'epoch': 2} {'type': 'loss', 'content': 0.18897606432437897, 'timestamp': '2025-10-01 04:27:03.845821', 'step': 7897, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:03.902936', 'step': 7897, 'epoch': 2} {'type': 'loss', 'content': 0.15939344465732574, 'timestamp': '2025-10-01 04:27:03.907785', 'step': 7898, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:27:03.961965', 'step': 7898, 'epoch': 2} {'type': 'loss', 'content': 0.07702402770519257, 'timestamp': '2025-10-01 04:27:03.973115', 'step': 7899, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:04.038871', 'step': 7899, 'epoch': 2} {'type': 'loss', 'content': 0.2289862185716629, 'timestamp': '2025-10-01 04:27:04.045053', 'step': 7900, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:04.098255', 'step': 7900, 'epoch': 2} {'type': 'loss', 'content': 0.10613071173429489, 'timestamp': '2025-10-01 04:27:04.101021', 'step': 7901, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:04.154005', 'step': 7901, 'epoch': 2} {'type': 'loss', 'content': 0.08957052230834961, 'timestamp': '2025-10-01 04:27:04.156371', 'step': 7902, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:04.209543', 'step': 7902, 'epoch': 2} {'type': 'loss', 'content': 0.15818250179290771, 'timestamp': '2025-10-01 04:27:04.211616', 'step': 7903, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:04.265344', 'step': 7903, 'epoch': 2} {'type': 'loss', 'content': 0.19077670574188232, 'timestamp': '2025-10-01 04:27:04.271156', 'step': 7904, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:04.338009', 'step': 7904, 'epoch': 2} {'type': 'loss', 'content': 0.17858847975730896, 'timestamp': '2025-10-01 04:27:04.344085', 'step': 7905, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:04.399781', 'step': 7905, 'epoch': 2} {'type': 'loss', 'content': 0.1480238139629364, 'timestamp': '2025-10-01 04:27:04.401733', 'step': 7906, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:27:04.468252', 'step': 7906, 'epoch': 2} {'type': 'loss', 'content': 0.20340152084827423, 'timestamp': '2025-10-01 04:27:04.470561', 'step': 7907, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:27:04.524917', 'step': 7907, 'epoch': 2} {'type': 'loss', 'content': 0.1471412628889084, 'timestamp': '2025-10-01 04:27:04.530875', 'step': 7908, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:04.586361', 'step': 7908, 'epoch': 2} {'type': 'loss', 'content': 0.1650506854057312, 'timestamp': '2025-10-01 04:27:04.588568', 'step': 7909, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:04.642355', 'step': 7909, 'epoch': 2} {'type': 'loss', 'content': 0.10801281034946442, 'timestamp': '2025-10-01 04:27:04.645834', 'step': 7910, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:04.700671', 'step': 7910, 'epoch': 2} {'type': 'loss', 'content': 0.14333631098270416, 'timestamp': '2025-10-01 04:27:04.702788', 'step': 7911, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:27:04.757370', 'step': 7911, 'epoch': 2} {'type': 'loss', 'content': 0.146198570728302, 'timestamp': '2025-10-01 04:27:04.763329', 'step': 7912, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:04.816767', 'step': 7912, 'epoch': 2} {'type': 'loss', 'content': 0.20234757661819458, 'timestamp': '2025-10-01 04:27:04.823085', 'step': 7913, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:27:04.877664', 'step': 7913, 'epoch': 2} {'type': 'loss', 'content': 0.17223034799098969, 'timestamp': '2025-10-01 04:27:04.879964', 'step': 7914, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:27:04.933892', 'step': 7914, 'epoch': 2} {'type': 'loss', 'content': 0.17294929921627045, 'timestamp': '2025-10-01 04:27:04.936016', 'step': 7915, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:04.989268', 'step': 7915, 'epoch': 2} {'type': 'loss', 'content': 0.13344913721084595, 'timestamp': '2025-10-01 04:27:04.995250', 'step': 7916, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:27:05.058371', 'step': 7916, 'epoch': 2} {'type': 'loss', 'content': 0.15915818512439728, 'timestamp': '2025-10-01 04:27:05.060404', 'step': 7917, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:05.114695', 'step': 7917, 'epoch': 2} {'type': 'loss', 'content': 0.11432314664125443, 'timestamp': '2025-10-01 04:27:05.117192', 'step': 7918, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:05.177208', 'step': 7918, 'epoch': 2} {'type': 'loss', 'content': 0.1516445130109787, 'timestamp': '2025-10-01 04:27:05.179270', 'step': 7919, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:05.232742', 'step': 7919, 'epoch': 2} {'type': 'loss', 'content': 0.209511861205101, 'timestamp': '2025-10-01 04:27:05.238356', 'step': 7920, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:05.291441', 'step': 7920, 'epoch': 2} {'type': 'loss', 'content': 0.13448986411094666, 'timestamp': '2025-10-01 04:27:05.293362', 'step': 7921, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:05.366199', 'step': 7921, 'epoch': 2} {'type': 'loss', 'content': 0.17540179193019867, 'timestamp': '2025-10-01 04:27:05.368235', 'step': 7922, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-01 04:27:18.792164', 'step': 7922, 'epoch': 2} {'type': 'pplx', 'content': 12199.681168434121, 'timestamp': '2025-10-01 04:27:18.795334', 'step': 7922, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:18.851663', 'step': 7922, 'epoch': 2} {'type': 'loss', 'content': 0.10892366617918015, 'timestamp': '2025-10-01 04:27:18.854063', 'step': 7923, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:18.916961', 'step': 7923, 'epoch': 2} {'type': 'loss', 'content': 0.17883580923080444, 'timestamp': '2025-10-01 04:27:18.922884', 'step': 7924, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:27:18.975922', 'step': 7924, 'epoch': 2} {'type': 'loss', 'content': 0.10989422351121902, 'timestamp': '2025-10-01 04:27:18.979273', 'step': 7925, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:19.034300', 'step': 7925, 'epoch': 2} {'type': 'loss', 'content': 0.1264486312866211, 'timestamp': '2025-10-01 04:27:19.036523', 'step': 7926, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:19.092402', 'step': 7926, 'epoch': 2} {'type': 'loss', 'content': 0.10163932293653488, 'timestamp': '2025-10-01 04:27:19.094473', 'step': 7927, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:19.159543', 'step': 7927, 'epoch': 2} {'type': 'loss', 'content': 0.13883471488952637, 'timestamp': '2025-10-01 04:27:19.166199', 'step': 7928, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:19.227831', 'step': 7928, 'epoch': 2} {'type': 'loss', 'content': 0.16850928962230682, 'timestamp': '2025-10-01 04:27:19.229940', 'step': 7929, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:19.289396', 'step': 7929, 'epoch': 2} {'type': 'loss', 'content': 0.07329101115465164, 'timestamp': '2025-10-01 04:27:19.291453', 'step': 7930, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:19.345418', 'step': 7930, 'epoch': 2} {'type': 'loss', 'content': 0.23334550857543945, 'timestamp': '2025-10-01 04:27:19.347460', 'step': 7931, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:19.400731', 'step': 7931, 'epoch': 2} {'type': 'loss', 'content': 0.1580660045146942, 'timestamp': '2025-10-01 04:27:19.406480', 'step': 7932, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:19.458524', 'step': 7932, 'epoch': 2} {'type': 'loss', 'content': 0.14199857413768768, 'timestamp': '2025-10-01 04:27:19.460646', 'step': 7933, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:19.514253', 'step': 7933, 'epoch': 2} {'type': 'loss', 'content': 0.07461828738451004, 'timestamp': '2025-10-01 04:27:19.516390', 'step': 7934, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:19.568924', 'step': 7934, 'epoch': 2} {'type': 'loss', 'content': 0.22011227905750275, 'timestamp': '2025-10-01 04:27:19.570987', 'step': 7935, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:19.628534', 'step': 7935, 'epoch': 2} {'type': 'loss', 'content': 0.07352939248085022, 'timestamp': '2025-10-01 04:27:19.634350', 'step': 7936, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:19.687205', 'step': 7936, 'epoch': 2} {'type': 'loss', 'content': 0.13457252085208893, 'timestamp': '2025-10-01 04:27:19.689254', 'step': 7937, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:19.747274', 'step': 7937, 'epoch': 2} {'type': 'loss', 'content': 0.09202168136835098, 'timestamp': '2025-10-01 04:27:19.750831', 'step': 7938, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:27:19.804905', 'step': 7938, 'epoch': 2} {'type': 'loss', 'content': 0.11161216348409653, 'timestamp': '2025-10-01 04:27:19.806939', 'step': 7939, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:19.860870', 'step': 7939, 'epoch': 2} {'type': 'loss', 'content': 0.29699471592903137, 'timestamp': '2025-10-01 04:27:19.866767', 'step': 7940, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:27:19.920331', 'step': 7940, 'epoch': 2} {'type': 'loss', 'content': 0.16538427770137787, 'timestamp': '2025-10-01 04:27:19.922249', 'step': 7941, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:19.976660', 'step': 7941, 'epoch': 2} {'type': 'loss', 'content': 0.1481316238641739, 'timestamp': '2025-10-01 04:27:19.978604', 'step': 7942, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:27:20.036587', 'step': 7942, 'epoch': 2} {'type': 'loss', 'content': 0.10546429455280304, 'timestamp': '2025-10-01 04:27:20.038594', 'step': 7943, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:20.091943', 'step': 7943, 'epoch': 2} {'type': 'loss', 'content': 0.20063643157482147, 'timestamp': '2025-10-01 04:27:20.097787', 'step': 7944, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:20.150386', 'step': 7944, 'epoch': 2} {'type': 'loss', 'content': 0.09455932676792145, 'timestamp': '2025-10-01 04:27:20.152778', 'step': 7945, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:20.207046', 'step': 7945, 'epoch': 2} {'type': 'loss', 'content': 0.10571429878473282, 'timestamp': '2025-10-01 04:27:20.209264', 'step': 7946, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:27:20.276079', 'step': 7946, 'epoch': 2} {'type': 'loss', 'content': 0.05445297807455063, 'timestamp': '2025-10-01 04:27:20.278180', 'step': 7947, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:20.345752', 'step': 7947, 'epoch': 2} {'type': 'loss', 'content': 0.19235196709632874, 'timestamp': '2025-10-01 04:27:20.351450', 'step': 7948, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:20.408022', 'step': 7948, 'epoch': 2} {'type': 'loss', 'content': 0.16302010416984558, 'timestamp': '2025-10-01 04:27:20.410162', 'step': 7949, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:20.463049', 'step': 7949, 'epoch': 2} {'type': 'loss', 'content': 0.16061317920684814, 'timestamp': '2025-10-01 04:27:20.465887', 'step': 7950, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:20.519452', 'step': 7950, 'epoch': 2} {'type': 'loss', 'content': 0.1751127988100052, 'timestamp': '2025-10-01 04:27:20.527127', 'step': 7951, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:20.579945', 'step': 7951, 'epoch': 2} {'type': 'loss', 'content': 0.10901438444852829, 'timestamp': '2025-10-01 04:27:20.585836', 'step': 7952, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:20.638649', 'step': 7952, 'epoch': 2} {'type': 'loss', 'content': 0.09487003833055496, 'timestamp': '2025-10-01 04:27:20.641142', 'step': 7953, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:20.694422', 'step': 7953, 'epoch': 2} {'type': 'loss', 'content': 0.1886420100927353, 'timestamp': '2025-10-01 04:27:20.696669', 'step': 7954, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:20.749520', 'step': 7954, 'epoch': 2} {'type': 'loss', 'content': 0.1663653403520584, 'timestamp': '2025-10-01 04:27:20.751777', 'step': 7955, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:27:20.805460', 'step': 7955, 'epoch': 2} {'type': 'loss', 'content': 0.19028422236442566, 'timestamp': '2025-10-01 04:27:20.811145', 'step': 7956, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:27:20.863602', 'step': 7956, 'epoch': 2} {'type': 'loss', 'content': 0.16047216951847076, 'timestamp': '2025-10-01 04:27:20.865873', 'step': 7957, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:20.920494', 'step': 7957, 'epoch': 2} {'type': 'loss', 'content': 0.15637893974781036, 'timestamp': '2025-10-01 04:27:20.922609', 'step': 7958, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:20.975187', 'step': 7958, 'epoch': 2} {'type': 'loss', 'content': 0.0862322598695755, 'timestamp': '2025-10-01 04:27:20.977186', 'step': 7959, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:21.030723', 'step': 7959, 'epoch': 2} {'type': 'loss', 'content': 0.07250865548849106, 'timestamp': '2025-10-01 04:27:21.036381', 'step': 7960, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:27:21.090480', 'step': 7960, 'epoch': 2} {'type': 'loss', 'content': 0.05994926393032074, 'timestamp': '2025-10-01 04:27:21.092575', 'step': 7961, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:21.145496', 'step': 7961, 'epoch': 2} {'type': 'loss', 'content': 0.1744314283132553, 'timestamp': '2025-10-01 04:27:21.147591', 'step': 7962, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:21.200584', 'step': 7962, 'epoch': 2} {'type': 'loss', 'content': 0.09683576226234436, 'timestamp': '2025-10-01 04:27:21.202520', 'step': 7963, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:21.255302', 'step': 7963, 'epoch': 2} {'type': 'loss', 'content': 0.09242260456085205, 'timestamp': '2025-10-01 04:27:21.260785', 'step': 7964, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:27:21.320433', 'step': 7964, 'epoch': 2} {'type': 'loss', 'content': 0.1036616712808609, 'timestamp': '2025-10-01 04:27:21.322407', 'step': 7965, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:21.375581', 'step': 7965, 'epoch': 2} {'type': 'loss', 'content': 0.1687791496515274, 'timestamp': '2025-10-01 04:27:21.377884', 'step': 7966, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:21.431912', 'step': 7966, 'epoch': 2} {'type': 'loss', 'content': 0.24810221791267395, 'timestamp': '2025-10-01 04:27:21.434805', 'step': 7967, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:21.487659', 'step': 7967, 'epoch': 2} {'type': 'loss', 'content': 0.08719956129789352, 'timestamp': '2025-10-01 04:27:21.493408', 'step': 7968, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:21.546245', 'step': 7968, 'epoch': 2} {'type': 'loss', 'content': 0.13529475033283234, 'timestamp': '2025-10-01 04:27:21.548607', 'step': 7969, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:21.601589', 'step': 7969, 'epoch': 2} {'type': 'loss', 'content': 0.09324997663497925, 'timestamp': '2025-10-01 04:27:21.603651', 'step': 7970, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:27:21.657135', 'step': 7970, 'epoch': 2} {'type': 'loss', 'content': 0.06833499670028687, 'timestamp': '2025-10-01 04:27:21.659089', 'step': 7971, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:21.712672', 'step': 7971, 'epoch': 2} {'type': 'loss', 'content': 0.09396512806415558, 'timestamp': '2025-10-01 04:27:21.718117', 'step': 7972, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:21.770784', 'step': 7972, 'epoch': 2} {'type': 'loss', 'content': 0.2417472004890442, 'timestamp': '2025-10-01 04:27:21.773002', 'step': 7973, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:21.825738', 'step': 7973, 'epoch': 2} {'type': 'loss', 'content': 0.13968448340892792, 'timestamp': '2025-10-01 04:27:21.832162', 'step': 7974, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:21.889043', 'step': 7974, 'epoch': 2} {'type': 'loss', 'content': 0.13268080353736877, 'timestamp': '2025-10-01 04:27:21.892999', 'step': 7975, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:21.953658', 'step': 7975, 'epoch': 2} {'type': 'loss', 'content': 0.15264464914798737, 'timestamp': '2025-10-01 04:27:21.959134', 'step': 7976, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:22.011267', 'step': 7976, 'epoch': 2} {'type': 'loss', 'content': 0.16764216125011444, 'timestamp': '2025-10-01 04:27:22.017310', 'step': 7977, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:22.070234', 'step': 7977, 'epoch': 2} {'type': 'loss', 'content': 0.08419705927371979, 'timestamp': '2025-10-01 04:27:22.072531', 'step': 7978, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:22.131280', 'step': 7978, 'epoch': 2} {'type': 'loss', 'content': 0.18248680233955383, 'timestamp': '2025-10-01 04:27:22.133359', 'step': 7979, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:27:22.186837', 'step': 7979, 'epoch': 2} {'type': 'loss', 'content': 0.10020815581083298, 'timestamp': '2025-10-01 04:27:22.193913', 'step': 7980, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:27:22.246303', 'step': 7980, 'epoch': 2} {'type': 'loss', 'content': 0.09586109220981598, 'timestamp': '2025-10-01 04:27:22.248544', 'step': 7981, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:22.314106', 'step': 7981, 'epoch': 2} {'type': 'loss', 'content': 0.2936067283153534, 'timestamp': '2025-10-01 04:27:22.316164', 'step': 7982, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:22.369201', 'step': 7982, 'epoch': 2} {'type': 'loss', 'content': 0.1266820877790451, 'timestamp': '2025-10-01 04:27:22.371399', 'step': 7983, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:22.424964', 'step': 7983, 'epoch': 2} {'type': 'loss', 'content': 0.06873118877410889, 'timestamp': '2025-10-01 04:27:22.430663', 'step': 7984, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:22.483076', 'step': 7984, 'epoch': 2} {'type': 'loss', 'content': 0.06506549566984177, 'timestamp': '2025-10-01 04:27:22.490865', 'step': 7985, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:27:22.544941', 'step': 7985, 'epoch': 2} {'type': 'loss', 'content': 0.19223973155021667, 'timestamp': '2025-10-01 04:27:22.547312', 'step': 7986, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:22.600922', 'step': 7986, 'epoch': 2} {'type': 'loss', 'content': 0.14731836318969727, 'timestamp': '2025-10-01 04:27:22.603505', 'step': 7987, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:27:22.659929', 'step': 7987, 'epoch': 2} {'type': 'loss', 'content': 0.1383257359266281, 'timestamp': '2025-10-01 04:27:22.666604', 'step': 7988, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:22.720148', 'step': 7988, 'epoch': 2} {'type': 'loss', 'content': 0.09089317172765732, 'timestamp': '2025-10-01 04:27:22.722920', 'step': 7989, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:22.780137', 'step': 7989, 'epoch': 2} {'type': 'loss', 'content': 0.15439511835575104, 'timestamp': '2025-10-01 04:27:22.782577', 'step': 7990, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:22.836885', 'step': 7990, 'epoch': 2} {'type': 'loss', 'content': 0.11664863675832748, 'timestamp': '2025-10-01 04:27:22.839383', 'step': 7991, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:22.895801', 'step': 7991, 'epoch': 2} {'type': 'loss', 'content': 0.07717392593622208, 'timestamp': '2025-10-01 04:27:22.901600', 'step': 7992, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-01 04:27:22.955798', 'step': 7992, 'epoch': 2} {'type': 'loss', 'content': 0.14294199645519257, 'timestamp': '2025-10-01 04:27:22.958430', 'step': 7993, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:27:23.012558', 'step': 7993, 'epoch': 2} {'type': 'loss', 'content': 0.1219363734126091, 'timestamp': '2025-10-01 04:27:23.015274', 'step': 7994, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:23.069496', 'step': 7994, 'epoch': 2} {'type': 'loss', 'content': 0.23651866614818573, 'timestamp': '2025-10-01 04:27:23.077234', 'step': 7995, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:27:23.134430', 'step': 7995, 'epoch': 2} {'type': 'loss', 'content': 0.16672198474407196, 'timestamp': '2025-10-01 04:27:23.140858', 'step': 7996, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:23.194433', 'step': 7996, 'epoch': 2} {'type': 'loss', 'content': 0.08769486099481583, 'timestamp': '2025-10-01 04:27:23.197085', 'step': 7997, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:23.251312', 'step': 7997, 'epoch': 2} {'type': 'loss', 'content': 0.08373358845710754, 'timestamp': '2025-10-01 04:27:23.257794', 'step': 7998, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:23.314325', 'step': 7998, 'epoch': 2} {'type': 'loss', 'content': 0.21018600463867188, 'timestamp': '2025-10-01 04:27:23.316890', 'step': 7999, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:23.371400', 'step': 7999, 'epoch': 2} {'type': 'loss', 'content': 0.14009007811546326, 'timestamp': '2025-10-01 04:27:23.377700', 'step': 8000, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 8000', 'timestamp': '2025-10-01 04:27:23.784550', 'step': 8000, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:23.839675', 'step': 8000, 'epoch': 2} {'type': 'loss', 'content': 0.11606806516647339, 'timestamp': '2025-10-01 04:27:23.841952', 'step': 8001, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:23.894886', 'step': 8001, 'epoch': 2} {'type': 'loss', 'content': 0.13415108621120453, 'timestamp': '2025-10-01 04:27:23.910069', 'step': 8002, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:23.963295', 'step': 8002, 'epoch': 2} {'type': 'loss', 'content': 0.09930820018053055, 'timestamp': '2025-10-01 04:27:23.965672', 'step': 8003, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:24.019274', 'step': 8003, 'epoch': 2} {'type': 'loss', 'content': 0.10212784260511398, 'timestamp': '2025-10-01 04:27:24.025701', 'step': 8004, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:24.078154', 'step': 8004, 'epoch': 2} {'type': 'loss', 'content': 0.13011308014392853, 'timestamp': '2025-10-01 04:27:24.080821', 'step': 8005, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:24.134151', 'step': 8005, 'epoch': 2} {'type': 'loss', 'content': 0.17535342276096344, 'timestamp': '2025-10-01 04:27:24.136390', 'step': 8006, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:24.191540', 'step': 8006, 'epoch': 2} {'type': 'loss', 'content': 0.14809395372867584, 'timestamp': '2025-10-01 04:27:24.193939', 'step': 8007, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:27:24.246948', 'step': 8007, 'epoch': 2} {'type': 'loss', 'content': 0.20533402264118195, 'timestamp': '2025-10-01 04:27:24.252844', 'step': 8008, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:24.307820', 'step': 8008, 'epoch': 2} {'type': 'loss', 'content': 0.16815680265426636, 'timestamp': '2025-10-01 04:27:24.310094', 'step': 8009, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:24.363746', 'step': 8009, 'epoch': 2} {'type': 'loss', 'content': 0.0936509519815445, 'timestamp': '2025-10-01 04:27:24.366419', 'step': 8010, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:24.419947', 'step': 8010, 'epoch': 2} {'type': 'loss', 'content': 0.11872343719005585, 'timestamp': '2025-10-01 04:27:24.422259', 'step': 8011, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:24.475691', 'step': 8011, 'epoch': 2} {'type': 'loss', 'content': 0.18606135249137878, 'timestamp': '2025-10-01 04:27:24.481352', 'step': 8012, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:24.534907', 'step': 8012, 'epoch': 2} {'type': 'loss', 'content': 0.13032487034797668, 'timestamp': '2025-10-01 04:27:24.537193', 'step': 8013, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:24.590723', 'step': 8013, 'epoch': 2} {'type': 'loss', 'content': 0.15825974941253662, 'timestamp': '2025-10-01 04:27:24.595181', 'step': 8014, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:24.650086', 'step': 8014, 'epoch': 2} {'type': 'loss', 'content': 0.11976206302642822, 'timestamp': '2025-10-01 04:27:24.652382', 'step': 8015, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:24.705878', 'step': 8015, 'epoch': 2} {'type': 'loss', 'content': 0.10162711143493652, 'timestamp': '2025-10-01 04:27:24.711505', 'step': 8016, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:24.764232', 'step': 8016, 'epoch': 2} {'type': 'loss', 'content': 0.10593202710151672, 'timestamp': '2025-10-01 04:27:24.766924', 'step': 8017, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:24.819673', 'step': 8017, 'epoch': 2} {'type': 'loss', 'content': 0.07300641387701035, 'timestamp': '2025-10-01 04:27:24.821954', 'step': 8018, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:24.875943', 'step': 8018, 'epoch': 2} {'type': 'loss', 'content': 0.12083692103624344, 'timestamp': '2025-10-01 04:27:24.878029', 'step': 8019, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:24.932040', 'step': 8019, 'epoch': 2} {'type': 'loss', 'content': 0.13386143743991852, 'timestamp': '2025-10-01 04:27:24.939766', 'step': 8020, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:24.992934', 'step': 8020, 'epoch': 2} {'type': 'loss', 'content': 0.16958793997764587, 'timestamp': '2025-10-01 04:27:24.995108', 'step': 8021, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:25.049141', 'step': 8021, 'epoch': 2} {'type': 'loss', 'content': 0.09066513180732727, 'timestamp': '2025-10-01 04:27:25.051318', 'step': 8022, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:27:25.105724', 'step': 8022, 'epoch': 2} {'type': 'loss', 'content': 0.12428689002990723, 'timestamp': '2025-10-01 04:27:25.108659', 'step': 8023, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:25.162366', 'step': 8023, 'epoch': 2} {'type': 'loss', 'content': 0.20868417620658875, 'timestamp': '2025-10-01 04:27:25.168755', 'step': 8024, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-01 04:27:25.223108', 'step': 8024, 'epoch': 2} {'type': 'loss', 'content': 0.21947592496871948, 'timestamp': '2025-10-01 04:27:25.227130', 'step': 8025, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:25.282332', 'step': 8025, 'epoch': 2} {'type': 'loss', 'content': 0.18838581442832947, 'timestamp': '2025-10-01 04:27:25.284778', 'step': 8026, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:25.339026', 'step': 8026, 'epoch': 2} {'type': 'loss', 'content': 0.23364603519439697, 'timestamp': '2025-10-01 04:27:25.341679', 'step': 8027, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:25.395574', 'step': 8027, 'epoch': 2} {'type': 'loss', 'content': 0.12271736562252045, 'timestamp': '2025-10-01 04:27:25.401601', 'step': 8028, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:25.455602', 'step': 8028, 'epoch': 2} {'type': 'loss', 'content': 0.18019168078899384, 'timestamp': '2025-10-01 04:27:25.468069', 'step': 8029, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:25.522645', 'step': 8029, 'epoch': 2} {'type': 'loss', 'content': 0.13061952590942383, 'timestamp': '2025-10-01 04:27:25.524935', 'step': 8030, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:25.578932', 'step': 8030, 'epoch': 2} {'type': 'loss', 'content': 0.15137463808059692, 'timestamp': '2025-10-01 04:27:25.581255', 'step': 8031, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:25.635482', 'step': 8031, 'epoch': 2} {'type': 'loss', 'content': 0.12054068595170975, 'timestamp': '2025-10-01 04:27:25.642850', 'step': 8032, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:25.695812', 'step': 8032, 'epoch': 2} {'type': 'loss', 'content': 0.05721375346183777, 'timestamp': '2025-10-01 04:27:25.698136', 'step': 8033, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:27:25.754042', 'step': 8033, 'epoch': 2} {'type': 'loss', 'content': 0.2927076816558838, 'timestamp': '2025-10-01 04:27:25.756309', 'step': 8034, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:25.810671', 'step': 8034, 'epoch': 2} {'type': 'loss', 'content': 0.06074653938412666, 'timestamp': '2025-10-01 04:27:25.812843', 'step': 8035, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:25.865789', 'step': 8035, 'epoch': 2} {'type': 'loss', 'content': 0.10103432089090347, 'timestamp': '2025-10-01 04:27:25.872238', 'step': 8036, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:25.925529', 'step': 8036, 'epoch': 2} {'type': 'loss', 'content': 0.053454987704753876, 'timestamp': '2025-10-01 04:27:25.927898', 'step': 8037, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:25.981599', 'step': 8037, 'epoch': 2} {'type': 'loss', 'content': 0.21194849908351898, 'timestamp': '2025-10-01 04:27:25.988891', 'step': 8038, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:26.042848', 'step': 8038, 'epoch': 2} {'type': 'loss', 'content': 0.19732148945331573, 'timestamp': '2025-10-01 04:27:26.045150', 'step': 8039, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:27:26.098779', 'step': 8039, 'epoch': 2} {'type': 'loss', 'content': 0.14846377074718475, 'timestamp': '2025-10-01 04:27:26.104539', 'step': 8040, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:26.157873', 'step': 8040, 'epoch': 2} {'type': 'loss', 'content': 0.11565493047237396, 'timestamp': '2025-10-01 04:27:26.160818', 'step': 8041, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:26.215217', 'step': 8041, 'epoch': 2} {'type': 'loss', 'content': 0.08925206959247589, 'timestamp': '2025-10-01 04:27:26.217322', 'step': 8042, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:26.271228', 'step': 8042, 'epoch': 2} {'type': 'loss', 'content': 0.10016396641731262, 'timestamp': '2025-10-01 04:27:26.273524', 'step': 8043, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:26.327620', 'step': 8043, 'epoch': 2} {'type': 'loss', 'content': 0.18683530390262604, 'timestamp': '2025-10-01 04:27:26.333366', 'step': 8044, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:27:26.386018', 'step': 8044, 'epoch': 2} {'type': 'loss', 'content': 0.1588352918624878, 'timestamp': '2025-10-01 04:27:26.388420', 'step': 8045, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-01 04:27:26.452306', 'step': 8045, 'epoch': 2} {'type': 'loss', 'content': 0.10720934718847275, 'timestamp': '2025-10-01 04:27:26.454722', 'step': 8046, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:26.509621', 'step': 8046, 'epoch': 2} {'type': 'loss', 'content': 0.11787001043558121, 'timestamp': '2025-10-01 04:27:26.512279', 'step': 8047, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:26.568131', 'step': 8047, 'epoch': 2} {'type': 'loss', 'content': 0.13092589378356934, 'timestamp': '2025-10-01 04:27:26.574569', 'step': 8048, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:26.627678', 'step': 8048, 'epoch': 2} {'type': 'loss', 'content': 0.2212730050086975, 'timestamp': '2025-10-01 04:27:26.629906', 'step': 8049, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:26.683259', 'step': 8049, 'epoch': 2} {'type': 'loss', 'content': 0.1375613659620285, 'timestamp': '2025-10-01 04:27:26.685625', 'step': 8050, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:27:26.739724', 'step': 8050, 'epoch': 2} {'type': 'loss', 'content': 0.15106597542762756, 'timestamp': '2025-10-01 04:27:26.741998', 'step': 8051, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-01 04:27:26.795815', 'step': 8051, 'epoch': 2} {'type': 'loss', 'content': 0.15242105722427368, 'timestamp': '2025-10-01 04:27:26.801827', 'step': 8052, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:26.854379', 'step': 8052, 'epoch': 2} {'type': 'loss', 'content': 0.20678363740444183, 'timestamp': '2025-10-01 04:27:26.856639', 'step': 8053, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:26.910554', 'step': 8053, 'epoch': 2} {'type': 'loss', 'content': 0.09711691737174988, 'timestamp': '2025-10-01 04:27:26.912859', 'step': 8054, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:26.966559', 'step': 8054, 'epoch': 2} {'type': 'loss', 'content': 0.14103637635707855, 'timestamp': '2025-10-01 04:27:26.968889', 'step': 8055, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:27.025462', 'step': 8055, 'epoch': 2} {'type': 'loss', 'content': 0.2700839936733246, 'timestamp': '2025-10-01 04:27:27.031703', 'step': 8056, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:27:27.084238', 'step': 8056, 'epoch': 2} {'type': 'loss', 'content': 0.15167245268821716, 'timestamp': '2025-10-01 04:27:27.087234', 'step': 8057, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:27.141047', 'step': 8057, 'epoch': 2} {'type': 'loss', 'content': 0.16230015456676483, 'timestamp': '2025-10-01 04:27:27.143213', 'step': 8058, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:27.197022', 'step': 8058, 'epoch': 2} {'type': 'loss', 'content': 0.15417441725730896, 'timestamp': '2025-10-01 04:27:27.199133', 'step': 8059, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:27:27.268897', 'step': 8059, 'epoch': 2} {'type': 'loss', 'content': 0.17369483411312103, 'timestamp': '2025-10-01 04:27:27.274701', 'step': 8060, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:27.328379', 'step': 8060, 'epoch': 2} {'type': 'loss', 'content': 0.08068986982107162, 'timestamp': '2025-10-01 04:27:27.331753', 'step': 8061, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:27:27.385926', 'step': 8061, 'epoch': 2} {'type': 'loss', 'content': 0.2642821967601776, 'timestamp': '2025-10-01 04:27:27.388350', 'step': 8062, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:27.455045', 'step': 8062, 'epoch': 2} {'type': 'loss', 'content': 0.16903351247310638, 'timestamp': '2025-10-01 04:27:27.457164', 'step': 8063, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:27.526574', 'step': 8063, 'epoch': 2} {'type': 'loss', 'content': 0.1692342311143875, 'timestamp': '2025-10-01 04:27:27.534227', 'step': 8064, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:27:27.587156', 'step': 8064, 'epoch': 2} {'type': 'loss', 'content': 0.1243968978524208, 'timestamp': '2025-10-01 04:27:27.589363', 'step': 8065, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:27.648296', 'step': 8065, 'epoch': 2} {'type': 'loss', 'content': 0.10587486624717712, 'timestamp': '2025-10-01 04:27:27.650778', 'step': 8066, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:27.705861', 'step': 8066, 'epoch': 2} {'type': 'loss', 'content': 0.07092271000146866, 'timestamp': '2025-10-01 04:27:27.708377', 'step': 8067, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:27.762949', 'step': 8067, 'epoch': 2} {'type': 'loss', 'content': 0.13851244747638702, 'timestamp': '2025-10-01 04:27:27.769331', 'step': 8068, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:27.823153', 'step': 8068, 'epoch': 2} {'type': 'loss', 'content': 0.06478291749954224, 'timestamp': '2025-10-01 04:27:27.825343', 'step': 8069, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:27.879072', 'step': 8069, 'epoch': 2} {'type': 'loss', 'content': 0.17378582060337067, 'timestamp': '2025-10-01 04:27:27.881408', 'step': 8070, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:27.935052', 'step': 8070, 'epoch': 2} {'type': 'loss', 'content': 0.168355330824852, 'timestamp': '2025-10-01 04:27:27.937137', 'step': 8071, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:27.990224', 'step': 8071, 'epoch': 2} {'type': 'loss', 'content': 0.18306584656238556, 'timestamp': '2025-10-01 04:27:27.996575', 'step': 8072, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:28.051188', 'step': 8072, 'epoch': 2} {'type': 'loss', 'content': 0.11259226500988007, 'timestamp': '2025-10-01 04:27:28.053372', 'step': 8073, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:28.107032', 'step': 8073, 'epoch': 2} {'type': 'loss', 'content': 0.09893215447664261, 'timestamp': '2025-10-01 04:27:28.109257', 'step': 8074, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:28.162915', 'step': 8074, 'epoch': 2} {'type': 'loss', 'content': 0.2302803248167038, 'timestamp': '2025-10-01 04:27:28.165106', 'step': 8075, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:28.219086', 'step': 8075, 'epoch': 2} {'type': 'loss', 'content': 0.13540969789028168, 'timestamp': '2025-10-01 04:27:28.225383', 'step': 8076, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:28.278170', 'step': 8076, 'epoch': 2} {'type': 'loss', 'content': 0.18004819750785828, 'timestamp': '2025-10-01 04:27:28.280363', 'step': 8077, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:28.333934', 'step': 8077, 'epoch': 2} {'type': 'loss', 'content': 0.19284984469413757, 'timestamp': '2025-10-01 04:27:28.338212', 'step': 8078, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:28.393858', 'step': 8078, 'epoch': 2} {'type': 'loss', 'content': 0.13703332841396332, 'timestamp': '2025-10-01 04:27:28.395989', 'step': 8079, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:27:28.449171', 'step': 8079, 'epoch': 2} {'type': 'loss', 'content': 0.18604201078414917, 'timestamp': '2025-10-01 04:27:28.455397', 'step': 8080, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:28.508309', 'step': 8080, 'epoch': 2} {'type': 'loss', 'content': 0.16196191310882568, 'timestamp': '2025-10-01 04:27:28.510757', 'step': 8081, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:27:28.564850', 'step': 8081, 'epoch': 2} {'type': 'loss', 'content': 0.18991053104400635, 'timestamp': '2025-10-01 04:27:28.567314', 'step': 8082, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:28.621190', 'step': 8082, 'epoch': 2} {'type': 'loss', 'content': 0.1801716834306717, 'timestamp': '2025-10-01 04:27:28.623479', 'step': 8083, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:28.677584', 'step': 8083, 'epoch': 2} {'type': 'loss', 'content': 0.1496601700782776, 'timestamp': '2025-10-01 04:27:28.683790', 'step': 8084, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:28.737471', 'step': 8084, 'epoch': 2} {'type': 'loss', 'content': 0.10615415126085281, 'timestamp': '2025-10-01 04:27:28.739895', 'step': 8085, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:28.793207', 'step': 8085, 'epoch': 2} {'type': 'loss', 'content': 0.19627191126346588, 'timestamp': '2025-10-01 04:27:28.795320', 'step': 8086, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:28.849948', 'step': 8086, 'epoch': 2} {'type': 'loss', 'content': 0.252475768327713, 'timestamp': '2025-10-01 04:27:28.852262', 'step': 8087, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:27:28.907885', 'step': 8087, 'epoch': 2} {'type': 'loss', 'content': 0.1292191743850708, 'timestamp': '2025-10-01 04:27:28.914023', 'step': 8088, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:28.967024', 'step': 8088, 'epoch': 2} {'type': 'loss', 'content': 0.19717653095722198, 'timestamp': '2025-10-01 04:27:28.969518', 'step': 8089, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:29.022321', 'step': 8089, 'epoch': 2} {'type': 'loss', 'content': 0.07814696431159973, 'timestamp': '2025-10-01 04:27:29.024485', 'step': 8090, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:29.077728', 'step': 8090, 'epoch': 2} {'type': 'loss', 'content': 0.1170843169093132, 'timestamp': '2025-10-01 04:27:29.080260', 'step': 8091, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:29.133962', 'step': 8091, 'epoch': 2} {'type': 'loss', 'content': 0.15831489861011505, 'timestamp': '2025-10-01 04:27:29.140089', 'step': 8092, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:29.195415', 'step': 8092, 'epoch': 2} {'type': 'loss', 'content': 0.16675032675266266, 'timestamp': '2025-10-01 04:27:29.197813', 'step': 8093, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:29.254955', 'step': 8093, 'epoch': 2} {'type': 'loss', 'content': 0.19119752943515778, 'timestamp': '2025-10-01 04:27:29.257280', 'step': 8094, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:27:29.315212', 'step': 8094, 'epoch': 2} {'type': 'loss', 'content': 0.12216431647539139, 'timestamp': '2025-10-01 04:27:29.318210', 'step': 8095, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:29.374986', 'step': 8095, 'epoch': 2} {'type': 'loss', 'content': 0.16595283150672913, 'timestamp': '2025-10-01 04:27:29.381882', 'step': 8096, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:29.439533', 'step': 8096, 'epoch': 2} {'type': 'loss', 'content': 0.10968003422021866, 'timestamp': '2025-10-01 04:27:29.441816', 'step': 8097, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:29.498506', 'step': 8097, 'epoch': 2} {'type': 'loss', 'content': 0.16122035682201385, 'timestamp': '2025-10-01 04:27:29.500907', 'step': 8098, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:29.563116', 'step': 8098, 'epoch': 2} {'type': 'loss', 'content': 0.15406575798988342, 'timestamp': '2025-10-01 04:27:29.565352', 'step': 8099, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:27:29.620283', 'step': 8099, 'epoch': 2} {'type': 'loss', 'content': 0.10153481364250183, 'timestamp': '2025-10-01 04:27:29.626954', 'step': 8100, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:29.682838', 'step': 8100, 'epoch': 2} {'type': 'loss', 'content': 0.13990198075771332, 'timestamp': '2025-10-01 04:27:29.684975', 'step': 8101, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:29.740498', 'step': 8101, 'epoch': 2} {'type': 'loss', 'content': 0.06956995278596878, 'timestamp': '2025-10-01 04:27:29.743137', 'step': 8102, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:29.799148', 'step': 8102, 'epoch': 2} {'type': 'loss', 'content': 0.18003562092781067, 'timestamp': '2025-10-01 04:27:29.801446', 'step': 8103, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:29.857729', 'step': 8103, 'epoch': 2} {'type': 'loss', 'content': 0.14000657200813293, 'timestamp': '2025-10-01 04:27:29.868271', 'step': 8104, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:29.921065', 'step': 8104, 'epoch': 2} {'type': 'loss', 'content': 0.15479226410388947, 'timestamp': '2025-10-01 04:27:29.923206', 'step': 8105, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:29.976531', 'step': 8105, 'epoch': 2} {'type': 'loss', 'content': 0.07604512572288513, 'timestamp': '2025-10-01 04:27:29.979023', 'step': 8106, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:30.031664', 'step': 8106, 'epoch': 2} {'type': 'loss', 'content': 0.20376479625701904, 'timestamp': '2025-10-01 04:27:30.039502', 'step': 8107, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:30.092804', 'step': 8107, 'epoch': 2} {'type': 'loss', 'content': 0.20218059420585632, 'timestamp': '2025-10-01 04:27:30.101182', 'step': 8108, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:30.154131', 'step': 8108, 'epoch': 2} {'type': 'loss', 'content': 0.16556191444396973, 'timestamp': '2025-10-01 04:27:30.156302', 'step': 8109, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:30.209168', 'step': 8109, 'epoch': 2} {'type': 'loss', 'content': 0.11064974218606949, 'timestamp': '2025-10-01 04:27:30.211610', 'step': 8110, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:27:30.270266', 'step': 8110, 'epoch': 2} {'type': 'loss', 'content': 0.2461155503988266, 'timestamp': '2025-10-01 04:27:30.272536', 'step': 8111, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:30.326711', 'step': 8111, 'epoch': 2} {'type': 'loss', 'content': 0.06201622262597084, 'timestamp': '2025-10-01 04:27:30.332583', 'step': 8112, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:27:30.391238', 'step': 8112, 'epoch': 2} {'type': 'loss', 'content': 0.1624228060245514, 'timestamp': '2025-10-01 04:27:30.393340', 'step': 8113, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:30.446643', 'step': 8113, 'epoch': 2} {'type': 'loss', 'content': 0.14146053791046143, 'timestamp': '2025-10-01 04:27:30.448822', 'step': 8114, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:27:30.502014', 'step': 8114, 'epoch': 2} {'type': 'loss', 'content': 0.19782139360904694, 'timestamp': '2025-10-01 04:27:30.504062', 'step': 8115, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:30.557504', 'step': 8115, 'epoch': 2} {'type': 'loss', 'content': 0.09055667370557785, 'timestamp': '2025-10-01 04:27:30.563154', 'step': 8116, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:30.615714', 'step': 8116, 'epoch': 2} {'type': 'loss', 'content': 0.10618657618761063, 'timestamp': '2025-10-01 04:27:30.617782', 'step': 8117, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:30.682765', 'step': 8117, 'epoch': 2} {'type': 'loss', 'content': 0.1013910248875618, 'timestamp': '2025-10-01 04:27:30.684706', 'step': 8118, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:30.741803', 'step': 8118, 'epoch': 2} {'type': 'loss', 'content': 0.13249260187149048, 'timestamp': '2025-10-01 04:27:30.744075', 'step': 8119, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:30.797186', 'step': 8119, 'epoch': 2} {'type': 'loss', 'content': 0.12855976819992065, 'timestamp': '2025-10-01 04:27:30.802877', 'step': 8120, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:30.866893', 'step': 8120, 'epoch': 2} {'type': 'loss', 'content': 0.16798768937587738, 'timestamp': '2025-10-01 04:27:30.868955', 'step': 8121, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:30.921837', 'step': 8121, 'epoch': 2} {'type': 'loss', 'content': 0.15489442646503448, 'timestamp': '2025-10-01 04:27:30.924087', 'step': 8122, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:30.980924', 'step': 8122, 'epoch': 2} {'type': 'loss', 'content': 0.22536353766918182, 'timestamp': '2025-10-01 04:27:30.983079', 'step': 8123, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:31.035843', 'step': 8123, 'epoch': 2} {'type': 'loss', 'content': 0.13995511829853058, 'timestamp': '2025-10-01 04:27:31.041559', 'step': 8124, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:31.094333', 'step': 8124, 'epoch': 2} {'type': 'loss', 'content': 0.18760143220424652, 'timestamp': '2025-10-01 04:27:31.096603', 'step': 8125, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:31.150002', 'step': 8125, 'epoch': 2} {'type': 'loss', 'content': 0.12663398683071136, 'timestamp': '2025-10-01 04:27:31.152282', 'step': 8126, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:27:31.213718', 'step': 8126, 'epoch': 2} {'type': 'loss', 'content': 0.2311512529850006, 'timestamp': '2025-10-01 04:27:31.216564', 'step': 8127, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:31.270882', 'step': 8127, 'epoch': 2} {'type': 'loss', 'content': 0.1689063161611557, 'timestamp': '2025-10-01 04:27:31.282585', 'step': 8128, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:31.335921', 'step': 8128, 'epoch': 2} {'type': 'loss', 'content': 0.2744986116886139, 'timestamp': '2025-10-01 04:27:31.338269', 'step': 8129, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:27:31.392872', 'step': 8129, 'epoch': 2} {'type': 'loss', 'content': 0.15382318198680878, 'timestamp': '2025-10-01 04:27:31.407275', 'step': 8130, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:31.461222', 'step': 8130, 'epoch': 2} {'type': 'loss', 'content': 0.10836359113454819, 'timestamp': '2025-10-01 04:27:31.463937', 'step': 8131, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:31.516785', 'step': 8131, 'epoch': 2} {'type': 'loss', 'content': 0.08824869245290756, 'timestamp': '2025-10-01 04:27:31.523015', 'step': 8132, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:31.576798', 'step': 8132, 'epoch': 2} {'type': 'loss', 'content': 0.13806453347206116, 'timestamp': '2025-10-01 04:27:31.583947', 'step': 8133, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:31.638246', 'step': 8133, 'epoch': 2} {'type': 'loss', 'content': 0.11471635103225708, 'timestamp': '2025-10-01 04:27:31.640766', 'step': 8134, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:31.702784', 'step': 8134, 'epoch': 2} {'type': 'loss', 'content': 0.17284870147705078, 'timestamp': '2025-10-01 04:27:31.705166', 'step': 8135, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:27:31.759256', 'step': 8135, 'epoch': 2} {'type': 'loss', 'content': 0.10229900479316711, 'timestamp': '2025-10-01 04:27:31.765522', 'step': 8136, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:31.824352', 'step': 8136, 'epoch': 2} {'type': 'loss', 'content': 0.11293641477823257, 'timestamp': '2025-10-01 04:27:31.826599', 'step': 8137, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:31.880653', 'step': 8137, 'epoch': 2} {'type': 'loss', 'content': 0.16385094821453094, 'timestamp': '2025-10-01 04:27:31.883368', 'step': 8138, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:31.938311', 'step': 8138, 'epoch': 2} {'type': 'loss', 'content': 0.13999372720718384, 'timestamp': '2025-10-01 04:27:31.940999', 'step': 8139, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:27:31.995454', 'step': 8139, 'epoch': 2} {'type': 'loss', 'content': 0.1254211813211441, 'timestamp': '2025-10-01 04:27:32.002870', 'step': 8140, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:32.057926', 'step': 8140, 'epoch': 2} {'type': 'loss', 'content': 0.12871044874191284, 'timestamp': '2025-10-01 04:27:32.060263', 'step': 8141, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:32.115121', 'step': 8141, 'epoch': 2} {'type': 'loss', 'content': 0.1340429186820984, 'timestamp': '2025-10-01 04:27:32.120391', 'step': 8142, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:32.175400', 'step': 8142, 'epoch': 2} {'type': 'loss', 'content': 0.14788880944252014, 'timestamp': '2025-10-01 04:27:32.178542', 'step': 8143, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:32.232994', 'step': 8143, 'epoch': 2} {'type': 'loss', 'content': 0.09003820270299911, 'timestamp': '2025-10-01 04:27:32.238857', 'step': 8144, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:32.295507', 'step': 8144, 'epoch': 2} {'type': 'loss', 'content': 0.16291889548301697, 'timestamp': '2025-10-01 04:27:32.297861', 'step': 8145, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:32.352009', 'step': 8145, 'epoch': 2} {'type': 'loss', 'content': 0.14912240207195282, 'timestamp': '2025-10-01 04:27:32.354661', 'step': 8146, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:32.413428', 'step': 8146, 'epoch': 2} {'type': 'loss', 'content': 0.0907839685678482, 'timestamp': '2025-10-01 04:27:32.415890', 'step': 8147, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:32.469817', 'step': 8147, 'epoch': 2} {'type': 'loss', 'content': 0.10352377593517303, 'timestamp': '2025-10-01 04:27:32.476167', 'step': 8148, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:32.529420', 'step': 8148, 'epoch': 2} {'type': 'loss', 'content': 0.14776402711868286, 'timestamp': '2025-10-01 04:27:32.533038', 'step': 8149, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:27:32.588552', 'step': 8149, 'epoch': 2} {'type': 'loss', 'content': 0.17982397973537445, 'timestamp': '2025-10-01 04:27:32.591303', 'step': 8150, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:32.645970', 'step': 8150, 'epoch': 2} {'type': 'loss', 'content': 0.09377191215753555, 'timestamp': '2025-10-01 04:27:32.648464', 'step': 8151, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:32.702665', 'step': 8151, 'epoch': 2} {'type': 'loss', 'content': 0.16164124011993408, 'timestamp': '2025-10-01 04:27:32.708991', 'step': 8152, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:32.764032', 'step': 8152, 'epoch': 2} {'type': 'loss', 'content': 0.1776556372642517, 'timestamp': '2025-10-01 04:27:32.769298', 'step': 8153, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:32.825042', 'step': 8153, 'epoch': 2} {'type': 'loss', 'content': 0.2585373520851135, 'timestamp': '2025-10-01 04:27:32.827534', 'step': 8154, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:32.881558', 'step': 8154, 'epoch': 2} {'type': 'loss', 'content': 0.20953939855098724, 'timestamp': '2025-10-01 04:27:32.883834', 'step': 8155, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:32.939590', 'step': 8155, 'epoch': 2} {'type': 'loss', 'content': 0.12714016437530518, 'timestamp': '2025-10-01 04:27:32.949394', 'step': 8156, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:33.001991', 'step': 8156, 'epoch': 2} {'type': 'loss', 'content': 0.13637293875217438, 'timestamp': '2025-10-01 04:27:33.004183', 'step': 8157, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:27:33.058218', 'step': 8157, 'epoch': 2} {'type': 'loss', 'content': 0.1451563537120819, 'timestamp': '2025-10-01 04:27:33.061541', 'step': 8158, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:33.118286', 'step': 8158, 'epoch': 2} {'type': 'loss', 'content': 0.1600901484489441, 'timestamp': '2025-10-01 04:27:33.120397', 'step': 8159, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:33.173380', 'step': 8159, 'epoch': 2} {'type': 'loss', 'content': 0.08165513724088669, 'timestamp': '2025-10-01 04:27:33.179068', 'step': 8160, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:33.231573', 'step': 8160, 'epoch': 2} {'type': 'loss', 'content': 0.09792401641607285, 'timestamp': '2025-10-01 04:27:33.233773', 'step': 8161, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:33.286193', 'step': 8161, 'epoch': 2} {'type': 'loss', 'content': 0.06885425746440887, 'timestamp': '2025-10-01 04:27:33.288429', 'step': 8162, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:33.342494', 'step': 8162, 'epoch': 2} {'type': 'loss', 'content': 0.15436327457427979, 'timestamp': '2025-10-01 04:27:33.344788', 'step': 8163, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:33.398068', 'step': 8163, 'epoch': 2} {'type': 'loss', 'content': 0.2313094586133957, 'timestamp': '2025-10-01 04:27:33.403795', 'step': 8164, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:33.456195', 'step': 8164, 'epoch': 2} {'type': 'loss', 'content': 0.1435636729001999, 'timestamp': '2025-10-01 04:27:33.458544', 'step': 8165, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:33.512810', 'step': 8165, 'epoch': 2} {'type': 'loss', 'content': 0.17904700338840485, 'timestamp': '2025-10-01 04:27:33.521364', 'step': 8166, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:27:33.574328', 'step': 8166, 'epoch': 2} {'type': 'loss', 'content': 0.24598583579063416, 'timestamp': '2025-10-01 04:27:33.577093', 'step': 8167, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:33.630193', 'step': 8167, 'epoch': 2} {'type': 'loss', 'content': 0.13195563852787018, 'timestamp': '2025-10-01 04:27:33.635626', 'step': 8168, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:33.689184', 'step': 8168, 'epoch': 2} {'type': 'loss', 'content': 0.09370006620883942, 'timestamp': '2025-10-01 04:27:33.691281', 'step': 8169, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:33.744777', 'step': 8169, 'epoch': 2} {'type': 'loss', 'content': 0.12881571054458618, 'timestamp': '2025-10-01 04:27:33.746903', 'step': 8170, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:33.800444', 'step': 8170, 'epoch': 2} {'type': 'loss', 'content': 0.15261614322662354, 'timestamp': '2025-10-01 04:27:33.802949', 'step': 8171, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:33.868061', 'step': 8171, 'epoch': 2} {'type': 'loss', 'content': 0.1276417076587677, 'timestamp': '2025-10-01 04:27:33.873655', 'step': 8172, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:27:33.926164', 'step': 8172, 'epoch': 2} {'type': 'loss', 'content': 0.15540800988674164, 'timestamp': '2025-10-01 04:27:33.928302', 'step': 8173, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:33.981357', 'step': 8173, 'epoch': 2} {'type': 'loss', 'content': 0.144997239112854, 'timestamp': '2025-10-01 04:27:33.983600', 'step': 8174, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:34.037235', 'step': 8174, 'epoch': 2} {'type': 'loss', 'content': 0.09739694744348526, 'timestamp': '2025-10-01 04:27:34.039309', 'step': 8175, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:34.093189', 'step': 8175, 'epoch': 2} {'type': 'loss', 'content': 0.10115702450275421, 'timestamp': '2025-10-01 04:27:34.099030', 'step': 8176, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:27:34.156114', 'step': 8176, 'epoch': 2} {'type': 'loss', 'content': 0.08370763063430786, 'timestamp': '2025-10-01 04:27:34.158139', 'step': 8177, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:34.210699', 'step': 8177, 'epoch': 2} {'type': 'loss', 'content': 0.1694457083940506, 'timestamp': '2025-10-01 04:27:34.212850', 'step': 8178, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:34.266101', 'step': 8178, 'epoch': 2} {'type': 'loss', 'content': 0.18912780284881592, 'timestamp': '2025-10-01 04:27:34.268245', 'step': 8179, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:34.322426', 'step': 8179, 'epoch': 2} {'type': 'loss', 'content': 0.0635184794664383, 'timestamp': '2025-10-01 04:27:34.328245', 'step': 8180, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:34.381110', 'step': 8180, 'epoch': 2} {'type': 'loss', 'content': 0.07281354069709778, 'timestamp': '2025-10-01 04:27:34.383347', 'step': 8181, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:34.440455', 'step': 8181, 'epoch': 2} {'type': 'loss', 'content': 0.15655697882175446, 'timestamp': '2025-10-01 04:27:34.442687', 'step': 8182, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:34.495809', 'step': 8182, 'epoch': 2} {'type': 'loss', 'content': 0.18360449373722076, 'timestamp': '2025-10-01 04:27:34.498175', 'step': 8183, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:27:34.551919', 'step': 8183, 'epoch': 2} {'type': 'loss', 'content': 0.04226258397102356, 'timestamp': '2025-10-01 04:27:34.557804', 'step': 8184, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:34.610784', 'step': 8184, 'epoch': 2} {'type': 'loss', 'content': 0.16031000018119812, 'timestamp': '2025-10-01 04:27:34.612960', 'step': 8185, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:34.666440', 'step': 8185, 'epoch': 2} {'type': 'loss', 'content': 0.16676203906536102, 'timestamp': '2025-10-01 04:27:34.668491', 'step': 8186, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:34.721276', 'step': 8186, 'epoch': 2} {'type': 'loss', 'content': 0.17517037689685822, 'timestamp': '2025-10-01 04:27:34.723246', 'step': 8187, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:34.776623', 'step': 8187, 'epoch': 2} {'type': 'loss', 'content': 0.08041085302829742, 'timestamp': '2025-10-01 04:27:34.796161', 'step': 8188, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:34.850470', 'step': 8188, 'epoch': 2} {'type': 'loss', 'content': 0.12236592918634415, 'timestamp': '2025-10-01 04:27:34.852766', 'step': 8189, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:34.908836', 'step': 8189, 'epoch': 2} {'type': 'loss', 'content': 0.1838894784450531, 'timestamp': '2025-10-01 04:27:34.911792', 'step': 8190, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:34.966188', 'step': 8190, 'epoch': 2} {'type': 'loss', 'content': 0.14340125024318695, 'timestamp': '2025-10-01 04:27:34.968779', 'step': 8191, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:35.021732', 'step': 8191, 'epoch': 2} {'type': 'loss', 'content': 0.09895216673612595, 'timestamp': '2025-10-01 04:27:35.028376', 'step': 8192, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:35.081403', 'step': 8192, 'epoch': 2} {'type': 'loss', 'content': 0.1683465540409088, 'timestamp': '2025-10-01 04:27:35.083981', 'step': 8193, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:35.136710', 'step': 8193, 'epoch': 2} {'type': 'loss', 'content': 0.20365193486213684, 'timestamp': '2025-10-01 04:27:35.138674', 'step': 8194, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:35.191366', 'step': 8194, 'epoch': 2} {'type': 'loss', 'content': 0.22280113399028778, 'timestamp': '2025-10-01 04:27:35.193377', 'step': 8195, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:27:35.246014', 'step': 8195, 'epoch': 2} {'type': 'loss', 'content': 0.10265923291444778, 'timestamp': '2025-10-01 04:27:35.252082', 'step': 8196, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:27:35.305108', 'step': 8196, 'epoch': 2} {'type': 'loss', 'content': 0.1194036453962326, 'timestamp': '2025-10-01 04:27:35.307482', 'step': 8197, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:35.360863', 'step': 8197, 'epoch': 2} {'type': 'loss', 'content': 0.11088747531175613, 'timestamp': '2025-10-01 04:27:35.363173', 'step': 8198, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:35.416616', 'step': 8198, 'epoch': 2} {'type': 'loss', 'content': 0.13564559817314148, 'timestamp': '2025-10-01 04:27:35.418803', 'step': 8199, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:35.472345', 'step': 8199, 'epoch': 2} {'type': 'loss', 'content': 0.06652777642011642, 'timestamp': '2025-10-01 04:27:35.478151', 'step': 8200, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:35.531227', 'step': 8200, 'epoch': 2} {'type': 'loss', 'content': 0.10380593687295914, 'timestamp': '2025-10-01 04:27:35.533849', 'step': 8201, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:35.587313', 'step': 8201, 'epoch': 2} {'type': 'loss', 'content': 0.12263770401477814, 'timestamp': '2025-10-01 04:27:35.589628', 'step': 8202, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:35.642803', 'step': 8202, 'epoch': 2} {'type': 'loss', 'content': 0.21747495234012604, 'timestamp': '2025-10-01 04:27:35.644833', 'step': 8203, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:35.699844', 'step': 8203, 'epoch': 2} {'type': 'loss', 'content': 0.16957184672355652, 'timestamp': '2025-10-01 04:27:35.705686', 'step': 8204, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:35.757992', 'step': 8204, 'epoch': 2} {'type': 'loss', 'content': 0.06769605726003647, 'timestamp': '2025-10-01 04:27:35.760196', 'step': 8205, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:35.813770', 'step': 8205, 'epoch': 2} {'type': 'loss', 'content': 0.12151913344860077, 'timestamp': '2025-10-01 04:27:35.815831', 'step': 8206, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:27:35.869394', 'step': 8206, 'epoch': 2} {'type': 'loss', 'content': 0.20711147785186768, 'timestamp': '2025-10-01 04:27:35.871476', 'step': 8207, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:35.925288', 'step': 8207, 'epoch': 2} {'type': 'loss', 'content': 0.15681302547454834, 'timestamp': '2025-10-01 04:27:35.931231', 'step': 8208, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:27:35.984389', 'step': 8208, 'epoch': 2} {'type': 'loss', 'content': 0.15719333291053772, 'timestamp': '2025-10-01 04:27:35.986476', 'step': 8209, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:36.039565', 'step': 8209, 'epoch': 2} {'type': 'loss', 'content': 0.17547427117824554, 'timestamp': '2025-10-01 04:27:36.042201', 'step': 8210, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:36.095628', 'step': 8210, 'epoch': 2} {'type': 'loss', 'content': 0.1902587115764618, 'timestamp': '2025-10-01 04:27:36.098067', 'step': 8211, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:36.152899', 'step': 8211, 'epoch': 2} {'type': 'loss', 'content': 0.19936639070510864, 'timestamp': '2025-10-01 04:27:36.158668', 'step': 8212, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:36.211591', 'step': 8212, 'epoch': 2} {'type': 'loss', 'content': 0.1443203091621399, 'timestamp': '2025-10-01 04:27:36.226364', 'step': 8213, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:36.279616', 'step': 8213, 'epoch': 2} {'type': 'loss', 'content': 0.20781251788139343, 'timestamp': '2025-10-01 04:27:36.281889', 'step': 8214, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:36.335306', 'step': 8214, 'epoch': 2} {'type': 'loss', 'content': 0.14472296833992004, 'timestamp': '2025-10-01 04:27:36.337479', 'step': 8215, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:36.390101', 'step': 8215, 'epoch': 2} {'type': 'loss', 'content': 0.10133524239063263, 'timestamp': '2025-10-01 04:27:36.395891', 'step': 8216, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:36.448498', 'step': 8216, 'epoch': 2} {'type': 'loss', 'content': 0.1606602966785431, 'timestamp': '2025-10-01 04:27:36.450496', 'step': 8217, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:36.503447', 'step': 8217, 'epoch': 2} {'type': 'loss', 'content': 0.2825760245323181, 'timestamp': '2025-10-01 04:27:36.505535', 'step': 8218, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:36.558611', 'step': 8218, 'epoch': 2} {'type': 'loss', 'content': 0.056615110486745834, 'timestamp': '2025-10-01 04:27:36.560648', 'step': 8219, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:36.613703', 'step': 8219, 'epoch': 2} {'type': 'loss', 'content': 0.10933960229158401, 'timestamp': '2025-10-01 04:27:36.619385', 'step': 8220, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:36.671715', 'step': 8220, 'epoch': 2} {'type': 'loss', 'content': 0.09687215089797974, 'timestamp': '2025-10-01 04:27:36.673883', 'step': 8221, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:36.726716', 'step': 8221, 'epoch': 2} {'type': 'loss', 'content': 0.12211046367883682, 'timestamp': '2025-10-01 04:27:36.728827', 'step': 8222, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:36.782009', 'step': 8222, 'epoch': 2} {'type': 'loss', 'content': 0.21904627978801727, 'timestamp': '2025-10-01 04:27:36.784402', 'step': 8223, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:36.837722', 'step': 8223, 'epoch': 2} {'type': 'loss', 'content': 0.0895262137055397, 'timestamp': '2025-10-01 04:27:36.843374', 'step': 8224, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:36.895754', 'step': 8224, 'epoch': 2} {'type': 'loss', 'content': 0.24363483488559723, 'timestamp': '2025-10-01 04:27:36.897938', 'step': 8225, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:36.963339', 'step': 8225, 'epoch': 2} {'type': 'loss', 'content': 0.19034633040428162, 'timestamp': '2025-10-01 04:27:36.965601', 'step': 8226, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:27:37.019527', 'step': 8226, 'epoch': 2} {'type': 'loss', 'content': 0.08980411291122437, 'timestamp': '2025-10-01 04:27:37.021600', 'step': 8227, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:37.074912', 'step': 8227, 'epoch': 2} {'type': 'loss', 'content': 0.1545526534318924, 'timestamp': '2025-10-01 04:27:37.080294', 'step': 8228, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:37.132915', 'step': 8228, 'epoch': 2} {'type': 'loss', 'content': 0.17996402084827423, 'timestamp': '2025-10-01 04:27:37.137215', 'step': 8229, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:37.201370', 'step': 8229, 'epoch': 2} {'type': 'loss', 'content': 0.11911171674728394, 'timestamp': '2025-10-01 04:27:37.204324', 'step': 8230, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:37.257407', 'step': 8230, 'epoch': 2} {'type': 'loss', 'content': 0.11810031533241272, 'timestamp': '2025-10-01 04:27:37.259247', 'step': 8231, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:27:37.311822', 'step': 8231, 'epoch': 2} {'type': 'loss', 'content': 0.05607445910573006, 'timestamp': '2025-10-01 04:27:37.317332', 'step': 8232, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:37.369852', 'step': 8232, 'epoch': 2} {'type': 'loss', 'content': 0.12837833166122437, 'timestamp': '2025-10-01 04:27:37.371956', 'step': 8233, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:37.426115', 'step': 8233, 'epoch': 2} {'type': 'loss', 'content': 0.13454954326152802, 'timestamp': '2025-10-01 04:27:37.428173', 'step': 8234, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:27:37.481137', 'step': 8234, 'epoch': 2} {'type': 'loss', 'content': 0.07795291393995285, 'timestamp': '2025-10-01 04:27:37.483291', 'step': 8235, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:37.537122', 'step': 8235, 'epoch': 2} {'type': 'loss', 'content': 0.15320397913455963, 'timestamp': '2025-10-01 04:27:37.542879', 'step': 8236, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:37.595936', 'step': 8236, 'epoch': 2} {'type': 'loss', 'content': 0.20193734765052795, 'timestamp': '2025-10-01 04:27:37.598302', 'step': 8237, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:27:37.651537', 'step': 8237, 'epoch': 2} {'type': 'loss', 'content': 0.061672210693359375, 'timestamp': '2025-10-01 04:27:37.653528', 'step': 8238, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:37.706726', 'step': 8238, 'epoch': 2} {'type': 'loss', 'content': 0.18314893543720245, 'timestamp': '2025-10-01 04:27:37.708857', 'step': 8239, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:37.762582', 'step': 8239, 'epoch': 2} {'type': 'loss', 'content': 0.19414173066616058, 'timestamp': '2025-10-01 04:27:37.768640', 'step': 8240, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:37.821646', 'step': 8240, 'epoch': 2} {'type': 'loss', 'content': 0.06500702351331711, 'timestamp': '2025-10-01 04:27:37.824077', 'step': 8241, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:27:37.877537', 'step': 8241, 'epoch': 2} {'type': 'loss', 'content': 0.11570436507463455, 'timestamp': '2025-10-01 04:27:37.879573', 'step': 8242, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:37.932682', 'step': 8242, 'epoch': 2} {'type': 'loss', 'content': 0.14477381110191345, 'timestamp': '2025-10-01 04:27:37.934977', 'step': 8243, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:27:37.988418', 'step': 8243, 'epoch': 2} {'type': 'loss', 'content': 0.15527132153511047, 'timestamp': '2025-10-01 04:27:37.994059', 'step': 8244, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:38.046954', 'step': 8244, 'epoch': 2} {'type': 'loss', 'content': 0.11544003337621689, 'timestamp': '2025-10-01 04:27:38.049241', 'step': 8245, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:38.102183', 'step': 8245, 'epoch': 2} {'type': 'loss', 'content': 0.22430066764354706, 'timestamp': '2025-10-01 04:27:38.104237', 'step': 8246, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:38.157862', 'step': 8246, 'epoch': 2} {'type': 'loss', 'content': 0.16129866242408752, 'timestamp': '2025-10-01 04:27:38.160053', 'step': 8247, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:38.213616', 'step': 8247, 'epoch': 2} {'type': 'loss', 'content': 0.10100904852151871, 'timestamp': '2025-10-01 04:27:38.219297', 'step': 8248, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:38.271553', 'step': 8248, 'epoch': 2} {'type': 'loss', 'content': 0.09971753507852554, 'timestamp': '2025-10-01 04:27:38.273743', 'step': 8249, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:27:38.326721', 'step': 8249, 'epoch': 2} {'type': 'loss', 'content': 0.22902564704418182, 'timestamp': '2025-10-01 04:27:38.328900', 'step': 8250, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:38.381789', 'step': 8250, 'epoch': 2} {'type': 'loss', 'content': 0.1713992953300476, 'timestamp': '2025-10-01 04:27:38.383833', 'step': 8251, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:38.436430', 'step': 8251, 'epoch': 2} {'type': 'loss', 'content': 0.11760467290878296, 'timestamp': '2025-10-01 04:27:38.442108', 'step': 8252, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:38.494501', 'step': 8252, 'epoch': 2} {'type': 'loss', 'content': 0.12521693110466003, 'timestamp': '2025-10-01 04:27:38.500843', 'step': 8253, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:38.555719', 'step': 8253, 'epoch': 2} {'type': 'loss', 'content': 0.20452137291431427, 'timestamp': '2025-10-01 04:27:38.558661', 'step': 8254, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:38.612407', 'step': 8254, 'epoch': 2} {'type': 'loss', 'content': 0.08884876221418381, 'timestamp': '2025-10-01 04:27:38.614750', 'step': 8255, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:38.669417', 'step': 8255, 'epoch': 2} {'type': 'loss', 'content': 0.11925846338272095, 'timestamp': '2025-10-01 04:27:38.675082', 'step': 8256, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:38.727935', 'step': 8256, 'epoch': 2} {'type': 'loss', 'content': 0.07780870795249939, 'timestamp': '2025-10-01 04:27:38.730092', 'step': 8257, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:38.782995', 'step': 8257, 'epoch': 2} {'type': 'loss', 'content': 0.08108656108379364, 'timestamp': '2025-10-01 04:27:38.796522', 'step': 8258, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:38.867302', 'step': 8258, 'epoch': 2} {'type': 'loss', 'content': 0.1639217883348465, 'timestamp': '2025-10-01 04:27:38.869605', 'step': 8259, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:38.936207', 'step': 8259, 'epoch': 2} {'type': 'loss', 'content': 0.12708096206188202, 'timestamp': '2025-10-01 04:27:38.941946', 'step': 8260, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:38.995090', 'step': 8260, 'epoch': 2} {'type': 'loss', 'content': 0.14901813864707947, 'timestamp': '2025-10-01 04:27:38.997197', 'step': 8261, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:39.050311', 'step': 8261, 'epoch': 2} {'type': 'loss', 'content': 0.11167389154434204, 'timestamp': '2025-10-01 04:27:39.052385', 'step': 8262, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:39.105571', 'step': 8262, 'epoch': 2} {'type': 'loss', 'content': 0.20604002475738525, 'timestamp': '2025-10-01 04:27:39.108110', 'step': 8263, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:39.161123', 'step': 8263, 'epoch': 2} {'type': 'loss', 'content': 0.09621411561965942, 'timestamp': '2025-10-01 04:27:39.168591', 'step': 8264, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:39.221195', 'step': 8264, 'epoch': 2} {'type': 'loss', 'content': 0.05513742193579674, 'timestamp': '2025-10-01 04:27:39.224661', 'step': 8265, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:39.281173', 'step': 8265, 'epoch': 2} {'type': 'loss', 'content': 0.14024898409843445, 'timestamp': '2025-10-01 04:27:39.283976', 'step': 8266, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:27:39.338142', 'step': 8266, 'epoch': 2} {'type': 'loss', 'content': 0.17425687611103058, 'timestamp': '2025-10-01 04:27:39.340516', 'step': 8267, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:27:39.400262', 'step': 8267, 'epoch': 2} {'type': 'loss', 'content': 0.169621080160141, 'timestamp': '2025-10-01 04:27:39.406314', 'step': 8268, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:39.460907', 'step': 8268, 'epoch': 2} {'type': 'loss', 'content': 0.15896831452846527, 'timestamp': '2025-10-01 04:27:39.463851', 'step': 8269, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:39.526433', 'step': 8269, 'epoch': 2} {'type': 'loss', 'content': 0.14789210259914398, 'timestamp': '2025-10-01 04:27:39.528806', 'step': 8270, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:39.582379', 'step': 8270, 'epoch': 2} {'type': 'loss', 'content': 0.12778495252132416, 'timestamp': '2025-10-01 04:27:39.584568', 'step': 8271, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:39.638240', 'step': 8271, 'epoch': 2} {'type': 'loss', 'content': 0.19537124037742615, 'timestamp': '2025-10-01 04:27:39.653522', 'step': 8272, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:39.707015', 'step': 8272, 'epoch': 2} {'type': 'loss', 'content': 0.08219271898269653, 'timestamp': '2025-10-01 04:27:39.717201', 'step': 8273, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:39.771887', 'step': 8273, 'epoch': 2} {'type': 'loss', 'content': 0.1387217789888382, 'timestamp': '2025-10-01 04:27:39.780849', 'step': 8274, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:27:39.836462', 'step': 8274, 'epoch': 2} {'type': 'loss', 'content': 0.09554485976696014, 'timestamp': '2025-10-01 04:27:39.844085', 'step': 8275, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:39.897833', 'step': 8275, 'epoch': 2} {'type': 'loss', 'content': 0.12837357819080353, 'timestamp': '2025-10-01 04:27:39.903625', 'step': 8276, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:39.959997', 'step': 8276, 'epoch': 2} {'type': 'loss', 'content': 0.16890200972557068, 'timestamp': '2025-10-01 04:27:39.962325', 'step': 8277, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:40.015860', 'step': 8277, 'epoch': 2} {'type': 'loss', 'content': 0.14488594233989716, 'timestamp': '2025-10-01 04:27:40.018228', 'step': 8278, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:40.077917', 'step': 8278, 'epoch': 2} {'type': 'loss', 'content': 0.14840306341648102, 'timestamp': '2025-10-01 04:27:40.080269', 'step': 8279, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:40.135500', 'step': 8279, 'epoch': 2} {'type': 'loss', 'content': 0.11446962505578995, 'timestamp': '2025-10-01 04:27:40.147760', 'step': 8280, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:27:40.204707', 'step': 8280, 'epoch': 2} {'type': 'loss', 'content': 0.18886563181877136, 'timestamp': '2025-10-01 04:27:40.207050', 'step': 8281, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:40.261108', 'step': 8281, 'epoch': 2} {'type': 'loss', 'content': 0.10969284921884537, 'timestamp': '2025-10-01 04:27:40.263310', 'step': 8282, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:40.317791', 'step': 8282, 'epoch': 2} {'type': 'loss', 'content': 0.09998796880245209, 'timestamp': '2025-10-01 04:27:40.320322', 'step': 8283, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:40.376646', 'step': 8283, 'epoch': 2} {'type': 'loss', 'content': 0.09631045162677765, 'timestamp': '2025-10-01 04:27:40.383499', 'step': 8284, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:27:40.436824', 'step': 8284, 'epoch': 2} {'type': 'loss', 'content': 0.18813152611255646, 'timestamp': '2025-10-01 04:27:40.439698', 'step': 8285, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:40.502797', 'step': 8285, 'epoch': 2} {'type': 'loss', 'content': 0.079838328063488, 'timestamp': '2025-10-01 04:27:40.505338', 'step': 8286, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:40.560122', 'step': 8286, 'epoch': 2} {'type': 'loss', 'content': 0.2727016806602478, 'timestamp': '2025-10-01 04:27:40.562871', 'step': 8287, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:40.617113', 'step': 8287, 'epoch': 2} {'type': 'loss', 'content': 0.15743525326251984, 'timestamp': '2025-10-01 04:27:40.623345', 'step': 8288, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:27:40.676785', 'step': 8288, 'epoch': 2} {'type': 'loss', 'content': 0.10154015570878983, 'timestamp': '2025-10-01 04:27:40.678989', 'step': 8289, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:40.731627', 'step': 8289, 'epoch': 2} {'type': 'loss', 'content': 0.11225298792123795, 'timestamp': '2025-10-01 04:27:40.733881', 'step': 8290, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:40.787443', 'step': 8290, 'epoch': 2} {'type': 'loss', 'content': 0.20902560651302338, 'timestamp': '2025-10-01 04:27:40.789792', 'step': 8291, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:40.843019', 'step': 8291, 'epoch': 2} {'type': 'loss', 'content': 0.1357938051223755, 'timestamp': '2025-10-01 04:27:40.848853', 'step': 8292, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:40.901859', 'step': 8292, 'epoch': 2} {'type': 'loss', 'content': 0.08505017310380936, 'timestamp': '2025-10-01 04:27:40.903971', 'step': 8293, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:40.956845', 'step': 8293, 'epoch': 2} {'type': 'loss', 'content': 0.12455981969833374, 'timestamp': '2025-10-01 04:27:40.960261', 'step': 8294, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:27:41.014469', 'step': 8294, 'epoch': 2} {'type': 'loss', 'content': 0.10054223984479904, 'timestamp': '2025-10-01 04:27:41.018733', 'step': 8295, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:27:41.081359', 'step': 8295, 'epoch': 2} {'type': 'loss', 'content': 0.10075029730796814, 'timestamp': '2025-10-01 04:27:41.093293', 'step': 8296, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:41.151275', 'step': 8296, 'epoch': 2} {'type': 'loss', 'content': 0.17831261456012726, 'timestamp': '2025-10-01 04:27:41.155187', 'step': 8297, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:27:41.213980', 'step': 8297, 'epoch': 2} {'type': 'loss', 'content': 0.1357300579547882, 'timestamp': '2025-10-01 04:27:41.216262', 'step': 8298, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:41.269238', 'step': 8298, 'epoch': 2} {'type': 'loss', 'content': 0.2623775005340576, 'timestamp': '2025-10-01 04:27:41.271608', 'step': 8299, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:41.325195', 'step': 8299, 'epoch': 2} {'type': 'loss', 'content': 0.15809611976146698, 'timestamp': '2025-10-01 04:27:41.333150', 'step': 8300, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:41.403521', 'step': 8300, 'epoch': 2} {'type': 'loss', 'content': 0.17311492562294006, 'timestamp': '2025-10-01 04:27:41.408304', 'step': 8301, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:41.463843', 'step': 8301, 'epoch': 2} {'type': 'loss', 'content': 0.08255874365568161, 'timestamp': '2025-10-01 04:27:41.466326', 'step': 8302, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:41.524339', 'step': 8302, 'epoch': 2} {'type': 'loss', 'content': 0.17453058063983917, 'timestamp': '2025-10-01 04:27:41.527211', 'step': 8303, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:41.581105', 'step': 8303, 'epoch': 2} {'type': 'loss', 'content': 0.15701955556869507, 'timestamp': '2025-10-01 04:27:41.587184', 'step': 8304, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:41.640703', 'step': 8304, 'epoch': 2} {'type': 'loss', 'content': 0.16061986982822418, 'timestamp': '2025-10-01 04:27:41.642770', 'step': 8305, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:41.695397', 'step': 8305, 'epoch': 2} {'type': 'loss', 'content': 0.1608431339263916, 'timestamp': '2025-10-01 04:27:41.697651', 'step': 8306, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:41.751071', 'step': 8306, 'epoch': 2} {'type': 'loss', 'content': 0.13745643198490143, 'timestamp': '2025-10-01 04:27:41.754014', 'step': 8307, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:27:41.813167', 'step': 8307, 'epoch': 2} {'type': 'loss', 'content': 0.14350338280200958, 'timestamp': '2025-10-01 04:27:41.818986', 'step': 8308, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:41.871447', 'step': 8308, 'epoch': 2} {'type': 'loss', 'content': 0.13106001913547516, 'timestamp': '2025-10-01 04:27:41.873575', 'step': 8309, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:41.926405', 'step': 8309, 'epoch': 2} {'type': 'loss', 'content': 0.11300058662891388, 'timestamp': '2025-10-01 04:27:41.931788', 'step': 8310, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:41.990188', 'step': 8310, 'epoch': 2} {'type': 'loss', 'content': 0.18206468224525452, 'timestamp': '2025-10-01 04:27:42.002295', 'step': 8311, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:42.056942', 'step': 8311, 'epoch': 2} {'type': 'loss', 'content': 0.11317591369152069, 'timestamp': '2025-10-01 04:27:42.063406', 'step': 8312, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:42.119234', 'step': 8312, 'epoch': 2} {'type': 'loss', 'content': 0.046495016664266586, 'timestamp': '2025-10-01 04:27:42.121685', 'step': 8313, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:27:42.174761', 'step': 8313, 'epoch': 2} {'type': 'loss', 'content': 0.1320112943649292, 'timestamp': '2025-10-01 04:27:42.181310', 'step': 8314, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:42.234737', 'step': 8314, 'epoch': 2} {'type': 'loss', 'content': 0.14535658061504364, 'timestamp': '2025-10-01 04:27:42.237062', 'step': 8315, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:42.302308', 'step': 8315, 'epoch': 2} {'type': 'loss', 'content': 0.1339515894651413, 'timestamp': '2025-10-01 04:27:42.308615', 'step': 8316, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:42.362648', 'step': 8316, 'epoch': 2} {'type': 'loss', 'content': 0.17822358012199402, 'timestamp': '2025-10-01 04:27:42.364863', 'step': 8317, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:42.424044', 'step': 8317, 'epoch': 2} {'type': 'loss', 'content': 0.17117342352867126, 'timestamp': '2025-10-01 04:27:42.426265', 'step': 8318, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:42.480290', 'step': 8318, 'epoch': 2} {'type': 'loss', 'content': 0.08629093319177628, 'timestamp': '2025-10-01 04:27:42.482503', 'step': 8319, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:27:42.535406', 'step': 8319, 'epoch': 2} {'type': 'loss', 'content': 0.16833290457725525, 'timestamp': '2025-10-01 04:27:42.541182', 'step': 8320, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:27:42.594337', 'step': 8320, 'epoch': 2} {'type': 'loss', 'content': 0.09595982730388641, 'timestamp': '2025-10-01 04:27:42.596773', 'step': 8321, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:42.660128', 'step': 8321, 'epoch': 2} {'type': 'loss', 'content': 0.0624481625854969, 'timestamp': '2025-10-01 04:27:42.662424', 'step': 8322, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:27:42.716849', 'step': 8322, 'epoch': 2} {'type': 'loss', 'content': 0.2507266402244568, 'timestamp': '2025-10-01 04:27:42.719567', 'step': 8323, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:27:42.778485', 'step': 8323, 'epoch': 2} {'type': 'loss', 'content': 0.11441279947757721, 'timestamp': '2025-10-01 04:27:42.785199', 'step': 8324, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:42.840234', 'step': 8324, 'epoch': 2} {'type': 'loss', 'content': 0.16970950365066528, 'timestamp': '2025-10-01 04:27:42.842555', 'step': 8325, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:42.900399', 'step': 8325, 'epoch': 2} {'type': 'loss', 'content': 0.1697975993156433, 'timestamp': '2025-10-01 04:27:42.903067', 'step': 8326, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:42.967770', 'step': 8326, 'epoch': 2} {'type': 'loss', 'content': 0.1294339895248413, 'timestamp': '2025-10-01 04:27:42.970244', 'step': 8327, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:43.033521', 'step': 8327, 'epoch': 2} {'type': 'loss', 'content': 0.19581253826618195, 'timestamp': '2025-10-01 04:27:43.040282', 'step': 8328, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:43.095650', 'step': 8328, 'epoch': 2} {'type': 'loss', 'content': 0.10603522509336472, 'timestamp': '2025-10-01 04:27:43.098253', 'step': 8329, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:43.152886', 'step': 8329, 'epoch': 2} {'type': 'loss', 'content': 0.16982193291187286, 'timestamp': '2025-10-01 04:27:43.155105', 'step': 8330, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:43.210143', 'step': 8330, 'epoch': 2} {'type': 'loss', 'content': 0.06018083915114403, 'timestamp': '2025-10-01 04:27:43.224935', 'step': 8331, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:27:43.301794', 'step': 8331, 'epoch': 2} {'type': 'loss', 'content': 0.13875509798526764, 'timestamp': '2025-10-01 04:27:43.309306', 'step': 8332, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:27:43.420022', 'step': 8332, 'epoch': 2} {'type': 'loss', 'content': 0.12702956795692444, 'timestamp': '2025-10-01 04:27:43.429593', 'step': 8333, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:43.491402', 'step': 8333, 'epoch': 2} {'type': 'loss', 'content': 0.2367510050535202, 'timestamp': '2025-10-01 04:27:43.508017', 'step': 8334, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:43.571238', 'step': 8334, 'epoch': 2} {'type': 'loss', 'content': 0.11529479920864105, 'timestamp': '2025-10-01 04:27:43.583027', 'step': 8335, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:43.654671', 'step': 8335, 'epoch': 2} {'type': 'loss', 'content': 0.14690011739730835, 'timestamp': '2025-10-01 04:27:43.689516', 'step': 8336, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:43.770690', 'step': 8336, 'epoch': 2} {'type': 'loss', 'content': 0.12096093595027924, 'timestamp': '2025-10-01 04:27:43.800007', 'step': 8337, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:43.934698', 'step': 8337, 'epoch': 2} {'type': 'loss', 'content': 0.174373060464859, 'timestamp': '2025-10-01 04:27:43.954018', 'step': 8338, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:44.141349', 'step': 8338, 'epoch': 2} {'type': 'loss', 'content': 0.12285113334655762, 'timestamp': '2025-10-01 04:27:44.163555', 'step': 8339, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:44.356773', 'step': 8339, 'epoch': 2} {'type': 'loss', 'content': 0.10955505818128586, 'timestamp': '2025-10-01 04:27:44.374386', 'step': 8340, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:44.526073', 'step': 8340, 'epoch': 2} {'type': 'loss', 'content': 0.08962130546569824, 'timestamp': '2025-10-01 04:27:44.545452', 'step': 8341, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:44.701159', 'step': 8341, 'epoch': 2} {'type': 'loss', 'content': 0.17132465541362762, 'timestamp': '2025-10-01 04:27:44.736833', 'step': 8342, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:44.886685', 'step': 8342, 'epoch': 2} {'type': 'loss', 'content': 0.1442057490348816, 'timestamp': '2025-10-01 04:27:44.932126', 'step': 8343, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:45.084459', 'step': 8343, 'epoch': 2} {'type': 'loss', 'content': 0.14585691690444946, 'timestamp': '2025-10-01 04:27:45.111189', 'step': 8344, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:45.192101', 'step': 8344, 'epoch': 2} {'type': 'loss', 'content': 0.13700297474861145, 'timestamp': '2025-10-01 04:27:45.211220', 'step': 8345, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:45.305525', 'step': 8345, 'epoch': 2} {'type': 'loss', 'content': 0.2732768654823303, 'timestamp': '2025-10-01 04:27:45.311063', 'step': 8346, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:27:45.421239', 'step': 8346, 'epoch': 2} {'type': 'loss', 'content': 0.19147293269634247, 'timestamp': '2025-10-01 04:27:45.435209', 'step': 8347, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:45.544896', 'step': 8347, 'epoch': 2} {'type': 'loss', 'content': 0.19102667272090912, 'timestamp': '2025-10-01 04:27:45.557361', 'step': 8348, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:45.640961', 'step': 8348, 'epoch': 2} {'type': 'loss', 'content': 0.23166880011558533, 'timestamp': '2025-10-01 04:27:45.643258', 'step': 8349, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:27:45.697566', 'step': 8349, 'epoch': 2} {'type': 'loss', 'content': 0.06760812550783157, 'timestamp': '2025-10-01 04:27:45.700801', 'step': 8350, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:45.762793', 'step': 8350, 'epoch': 2} {'type': 'loss', 'content': 0.15151827037334442, 'timestamp': '2025-10-01 04:27:45.764723', 'step': 8351, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:45.820023', 'step': 8351, 'epoch': 2} {'type': 'loss', 'content': 0.08729627728462219, 'timestamp': '2025-10-01 04:27:45.832489', 'step': 8352, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:45.899894', 'step': 8352, 'epoch': 2} {'type': 'loss', 'content': 0.12445051223039627, 'timestamp': '2025-10-01 04:27:45.901758', 'step': 8353, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:45.954881', 'step': 8353, 'epoch': 2} {'type': 'loss', 'content': 0.13756613433361053, 'timestamp': '2025-10-01 04:27:45.957182', 'step': 8354, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:46.011366', 'step': 8354, 'epoch': 2} {'type': 'loss', 'content': 0.10446913540363312, 'timestamp': '2025-10-01 04:27:46.014903', 'step': 8355, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:46.068164', 'step': 8355, 'epoch': 2} {'type': 'loss', 'content': 0.14777158200740814, 'timestamp': '2025-10-01 04:27:46.074148', 'step': 8356, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:27:46.126605', 'step': 8356, 'epoch': 2} {'type': 'loss', 'content': 0.1287200152873993, 'timestamp': '2025-10-01 04:27:46.128965', 'step': 8357, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:46.197210', 'step': 8357, 'epoch': 2} {'type': 'loss', 'content': 0.12459684163331985, 'timestamp': '2025-10-01 04:27:46.199730', 'step': 8358, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:46.253122', 'step': 8358, 'epoch': 2} {'type': 'loss', 'content': 0.13522052764892578, 'timestamp': '2025-10-01 04:27:46.254936', 'step': 8359, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:46.308208', 'step': 8359, 'epoch': 2} {'type': 'loss', 'content': 0.2743675410747528, 'timestamp': '2025-10-01 04:27:46.316407', 'step': 8360, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:46.404867', 'step': 8360, 'epoch': 2} {'type': 'loss', 'content': 0.09401381760835648, 'timestamp': '2025-10-01 04:27:46.406777', 'step': 8361, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:46.460603', 'step': 8361, 'epoch': 2} {'type': 'loss', 'content': 0.10474810749292374, 'timestamp': '2025-10-01 04:27:46.462699', 'step': 8362, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:27:46.526159', 'step': 8362, 'epoch': 2} {'type': 'loss', 'content': 0.11000820249319077, 'timestamp': '2025-10-01 04:27:46.528447', 'step': 8363, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:46.584135', 'step': 8363, 'epoch': 2} {'type': 'loss', 'content': 0.10611974447965622, 'timestamp': '2025-10-01 04:27:46.589838', 'step': 8364, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:27:46.643186', 'step': 8364, 'epoch': 2} {'type': 'loss', 'content': 0.1694374680519104, 'timestamp': '2025-10-01 04:27:46.645388', 'step': 8365, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:27:46.699226', 'step': 8365, 'epoch': 2} {'type': 'loss', 'content': 0.18097226321697235, 'timestamp': '2025-10-01 04:27:46.701405', 'step': 8366, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:46.754429', 'step': 8366, 'epoch': 2} {'type': 'loss', 'content': 0.1296585500240326, 'timestamp': '2025-10-01 04:27:46.756888', 'step': 8367, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:46.810165', 'step': 8367, 'epoch': 2} {'type': 'loss', 'content': 0.10643124580383301, 'timestamp': '2025-10-01 04:27:46.815864', 'step': 8368, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:27:46.868878', 'step': 8368, 'epoch': 2} {'type': 'loss', 'content': 0.17023147642612457, 'timestamp': '2025-10-01 04:27:46.871101', 'step': 8369, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:46.924625', 'step': 8369, 'epoch': 2} {'type': 'loss', 'content': 0.12708088755607605, 'timestamp': '2025-10-01 04:27:46.941776', 'step': 8370, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:46.997039', 'step': 8370, 'epoch': 2} {'type': 'loss', 'content': 0.1577230989933014, 'timestamp': '2025-10-01 04:27:46.999119', 'step': 8371, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:47.051801', 'step': 8371, 'epoch': 2} {'type': 'loss', 'content': 0.15410251915454865, 'timestamp': '2025-10-01 04:27:47.057779', 'step': 8372, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:47.110540', 'step': 8372, 'epoch': 2} {'type': 'loss', 'content': 0.11971113830804825, 'timestamp': '2025-10-01 04:27:47.112786', 'step': 8373, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:47.165448', 'step': 8373, 'epoch': 2} {'type': 'loss', 'content': 0.07504034787416458, 'timestamp': '2025-10-01 04:27:47.167362', 'step': 8374, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-01 04:27:47.221144', 'step': 8374, 'epoch': 2} {'type': 'loss', 'content': 0.2420719563961029, 'timestamp': '2025-10-01 04:27:47.223243', 'step': 8375, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:47.276141', 'step': 8375, 'epoch': 2} {'type': 'loss', 'content': 0.12122341990470886, 'timestamp': '2025-10-01 04:27:47.282263', 'step': 8376, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:47.334876', 'step': 8376, 'epoch': 2} {'type': 'loss', 'content': 0.21521712839603424, 'timestamp': '2025-10-01 04:27:47.337333', 'step': 8377, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:47.390642', 'step': 8377, 'epoch': 2} {'type': 'loss', 'content': 0.1663672775030136, 'timestamp': '2025-10-01 04:27:47.392675', 'step': 8378, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:47.454091', 'step': 8378, 'epoch': 2} {'type': 'loss', 'content': 0.22811229526996613, 'timestamp': '2025-10-01 04:27:47.456324', 'step': 8379, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:47.509564', 'step': 8379, 'epoch': 2} {'type': 'loss', 'content': 0.14954352378845215, 'timestamp': '2025-10-01 04:27:47.515517', 'step': 8380, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:47.568908', 'step': 8380, 'epoch': 2} {'type': 'loss', 'content': 0.23542310297489166, 'timestamp': '2025-10-01 04:27:47.572066', 'step': 8381, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:47.625559', 'step': 8381, 'epoch': 2} {'type': 'loss', 'content': 0.1295764446258545, 'timestamp': '2025-10-01 04:27:47.627926', 'step': 8382, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:47.681808', 'step': 8382, 'epoch': 2} {'type': 'loss', 'content': 0.1473185420036316, 'timestamp': '2025-10-01 04:27:47.684424', 'step': 8383, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:47.751884', 'step': 8383, 'epoch': 2} {'type': 'loss', 'content': 0.20589442551136017, 'timestamp': '2025-10-01 04:27:47.760666', 'step': 8384, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:27:47.814048', 'step': 8384, 'epoch': 2} {'type': 'loss', 'content': 0.10361306369304657, 'timestamp': '2025-10-01 04:27:47.816292', 'step': 8385, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:27:47.869746', 'step': 8385, 'epoch': 2} {'type': 'loss', 'content': 0.2138814479112625, 'timestamp': '2025-10-01 04:27:47.871791', 'step': 8386, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:27:47.924972', 'step': 8386, 'epoch': 2} {'type': 'loss', 'content': 0.20946969091892242, 'timestamp': '2025-10-01 04:27:47.927046', 'step': 8387, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:27:47.979730', 'step': 8387, 'epoch': 2} {'type': 'loss', 'content': 0.12701059877872467, 'timestamp': '2025-10-01 04:27:47.985416', 'step': 8388, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-01 04:28:01.252984', 'step': 8388, 'epoch': 2} {'type': 'pplx', 'content': 10505.58178405043, 'timestamp': '2025-10-01 04:28:01.255968', 'step': 8388, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:28:01.309792', 'step': 8388, 'epoch': 2} {'type': 'loss', 'content': 0.1330886334180832, 'timestamp': '2025-10-01 04:28:01.311836', 'step': 8389, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:01.365832', 'step': 8389, 'epoch': 2} {'type': 'loss', 'content': 0.11445984244346619, 'timestamp': '2025-10-01 04:28:01.367994', 'step': 8390, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:01.421373', 'step': 8390, 'epoch': 2} {'type': 'loss', 'content': 0.10572444647550583, 'timestamp': '2025-10-01 04:28:01.423385', 'step': 8391, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:01.477140', 'step': 8391, 'epoch': 2} {'type': 'loss', 'content': 0.2383638322353363, 'timestamp': '2025-10-01 04:28:01.483211', 'step': 8392, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:01.536911', 'step': 8392, 'epoch': 2} {'type': 'loss', 'content': 0.08220302313566208, 'timestamp': '2025-10-01 04:28:01.539163', 'step': 8393, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:01.592744', 'step': 8393, 'epoch': 2} {'type': 'loss', 'content': 0.09731832146644592, 'timestamp': '2025-10-01 04:28:01.595058', 'step': 8394, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:01.648855', 'step': 8394, 'epoch': 2} {'type': 'loss', 'content': 0.15665872395038605, 'timestamp': '2025-10-01 04:28:01.650927', 'step': 8395, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:01.704863', 'step': 8395, 'epoch': 2} {'type': 'loss', 'content': 0.08410105854272842, 'timestamp': '2025-10-01 04:28:01.710429', 'step': 8396, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:01.763577', 'step': 8396, 'epoch': 2} {'type': 'loss', 'content': 0.17861655354499817, 'timestamp': '2025-10-01 04:28:01.765650', 'step': 8397, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:01.820591', 'step': 8397, 'epoch': 2} {'type': 'loss', 'content': 0.21626733243465424, 'timestamp': '2025-10-01 04:28:01.822539', 'step': 8398, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:28:01.875977', 'step': 8398, 'epoch': 2} {'type': 'loss', 'content': 0.1799154132604599, 'timestamp': '2025-10-01 04:28:01.878067', 'step': 8399, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:01.938008', 'step': 8399, 'epoch': 2} {'type': 'loss', 'content': 0.17529207468032837, 'timestamp': '2025-10-01 04:28:01.943955', 'step': 8400, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:01.996836', 'step': 8400, 'epoch': 2} {'type': 'loss', 'content': 0.14946143329143524, 'timestamp': '2025-10-01 04:28:01.999381', 'step': 8401, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:02.052430', 'step': 8401, 'epoch': 2} {'type': 'loss', 'content': 0.16502591967582703, 'timestamp': '2025-10-01 04:28:02.054784', 'step': 8402, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:02.108215', 'step': 8402, 'epoch': 2} {'type': 'loss', 'content': 0.16196908056735992, 'timestamp': '2025-10-01 04:28:02.110361', 'step': 8403, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:02.167087', 'step': 8403, 'epoch': 2} {'type': 'loss', 'content': 0.08134419471025467, 'timestamp': '2025-10-01 04:28:02.173108', 'step': 8404, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:02.226887', 'step': 8404, 'epoch': 2} {'type': 'loss', 'content': 0.1235034391283989, 'timestamp': '2025-10-01 04:28:02.228875', 'step': 8405, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:02.283159', 'step': 8405, 'epoch': 2} {'type': 'loss', 'content': 0.0836590901017189, 'timestamp': '2025-10-01 04:28:02.285937', 'step': 8406, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:02.339466', 'step': 8406, 'epoch': 2} {'type': 'loss', 'content': 0.14564819633960724, 'timestamp': '2025-10-01 04:28:02.358268', 'step': 8407, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:02.414477', 'step': 8407, 'epoch': 2} {'type': 'loss', 'content': 0.16409064829349518, 'timestamp': '2025-10-01 04:28:02.420678', 'step': 8408, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:28:02.490122', 'step': 8408, 'epoch': 2} {'type': 'loss', 'content': 0.17155200242996216, 'timestamp': '2025-10-01 04:28:02.493041', 'step': 8409, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:02.546786', 'step': 8409, 'epoch': 2} {'type': 'loss', 'content': 0.12713582813739777, 'timestamp': '2025-10-01 04:28:02.549237', 'step': 8410, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:02.610106', 'step': 8410, 'epoch': 2} {'type': 'loss', 'content': 0.1569758653640747, 'timestamp': '2025-10-01 04:28:02.612343', 'step': 8411, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:02.665530', 'step': 8411, 'epoch': 2} {'type': 'loss', 'content': 0.14270664751529694, 'timestamp': '2025-10-01 04:28:02.671517', 'step': 8412, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:02.724168', 'step': 8412, 'epoch': 2} {'type': 'loss', 'content': 0.11044630408287048, 'timestamp': '2025-10-01 04:28:02.726385', 'step': 8413, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:28:02.779439', 'step': 8413, 'epoch': 2} {'type': 'loss', 'content': 0.05886908248066902, 'timestamp': '2025-10-01 04:28:02.782489', 'step': 8414, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:02.837304', 'step': 8414, 'epoch': 2} {'type': 'loss', 'content': 0.10178828984498978, 'timestamp': '2025-10-01 04:28:02.839976', 'step': 8415, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:02.893579', 'step': 8415, 'epoch': 2} {'type': 'loss', 'content': 0.10735738277435303, 'timestamp': '2025-10-01 04:28:02.903533', 'step': 8416, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:02.956914', 'step': 8416, 'epoch': 2} {'type': 'loss', 'content': 0.17007994651794434, 'timestamp': '2025-10-01 04:28:02.962152', 'step': 8417, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:03.015218', 'step': 8417, 'epoch': 2} {'type': 'loss', 'content': 0.1395632028579712, 'timestamp': '2025-10-01 04:28:03.017502', 'step': 8418, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:03.071498', 'step': 8418, 'epoch': 2} {'type': 'loss', 'content': 0.20475803315639496, 'timestamp': '2025-10-01 04:28:03.074053', 'step': 8419, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:03.128253', 'step': 8419, 'epoch': 2} {'type': 'loss', 'content': 0.1486053764820099, 'timestamp': '2025-10-01 04:28:03.134315', 'step': 8420, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:03.187899', 'step': 8420, 'epoch': 2} {'type': 'loss', 'content': 0.09754547476768494, 'timestamp': '2025-10-01 04:28:03.190079', 'step': 8421, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:03.244550', 'step': 8421, 'epoch': 2} {'type': 'loss', 'content': 0.12190496176481247, 'timestamp': '2025-10-01 04:28:03.247440', 'step': 8422, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:03.302615', 'step': 8422, 'epoch': 2} {'type': 'loss', 'content': 0.14593136310577393, 'timestamp': '2025-10-01 04:28:03.304960', 'step': 8423, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:03.357519', 'step': 8423, 'epoch': 2} {'type': 'loss', 'content': 0.1184580996632576, 'timestamp': '2025-10-01 04:28:03.363374', 'step': 8424, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:28:03.420121', 'step': 8424, 'epoch': 2} {'type': 'loss', 'content': 0.13224837183952332, 'timestamp': '2025-10-01 04:28:03.422356', 'step': 8425, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:03.476183', 'step': 8425, 'epoch': 2} {'type': 'loss', 'content': 0.12235294282436371, 'timestamp': '2025-10-01 04:28:03.478279', 'step': 8426, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:28:03.532059', 'step': 8426, 'epoch': 2} {'type': 'loss', 'content': 0.11183124780654907, 'timestamp': '2025-10-01 04:28:03.534181', 'step': 8427, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:03.603022', 'step': 8427, 'epoch': 2} {'type': 'loss', 'content': 0.15543735027313232, 'timestamp': '2025-10-01 04:28:03.608854', 'step': 8428, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:03.661718', 'step': 8428, 'epoch': 2} {'type': 'loss', 'content': 0.10248742997646332, 'timestamp': '2025-10-01 04:28:03.664581', 'step': 8429, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:03.717618', 'step': 8429, 'epoch': 2} {'type': 'loss', 'content': 0.16248533129692078, 'timestamp': '2025-10-01 04:28:03.720096', 'step': 8430, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:03.773885', 'step': 8430, 'epoch': 2} {'type': 'loss', 'content': 0.1254580318927765, 'timestamp': '2025-10-01 04:28:03.776118', 'step': 8431, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:03.829009', 'step': 8431, 'epoch': 2} {'type': 'loss', 'content': 0.2042636126279831, 'timestamp': '2025-10-01 04:28:03.834859', 'step': 8432, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:03.887097', 'step': 8432, 'epoch': 2} {'type': 'loss', 'content': 0.14453525841236115, 'timestamp': '2025-10-01 04:28:03.889329', 'step': 8433, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:03.943177', 'step': 8433, 'epoch': 2} {'type': 'loss', 'content': 0.10101047903299332, 'timestamp': '2025-10-01 04:28:03.945568', 'step': 8434, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:03.999702', 'step': 8434, 'epoch': 2} {'type': 'loss', 'content': 0.177212655544281, 'timestamp': '2025-10-01 04:28:04.001907', 'step': 8435, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:28:04.054833', 'step': 8435, 'epoch': 2} {'type': 'loss', 'content': 0.17182208597660065, 'timestamp': '2025-10-01 04:28:04.075132', 'step': 8436, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:04.128043', 'step': 8436, 'epoch': 2} {'type': 'loss', 'content': 0.12836889922618866, 'timestamp': '2025-10-01 04:28:04.130860', 'step': 8437, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:04.183920', 'step': 8437, 'epoch': 2} {'type': 'loss', 'content': 0.16803522408008575, 'timestamp': '2025-10-01 04:28:04.186125', 'step': 8438, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:04.239784', 'step': 8438, 'epoch': 2} {'type': 'loss', 'content': 0.17091186344623566, 'timestamp': '2025-10-01 04:28:04.241814', 'step': 8439, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:04.294482', 'step': 8439, 'epoch': 2} {'type': 'loss', 'content': 0.14748844504356384, 'timestamp': '2025-10-01 04:28:04.304769', 'step': 8440, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:04.358997', 'step': 8440, 'epoch': 2} {'type': 'loss', 'content': 0.08788712322711945, 'timestamp': '2025-10-01 04:28:04.361048', 'step': 8441, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:04.414093', 'step': 8441, 'epoch': 2} {'type': 'loss', 'content': 0.11450343579053879, 'timestamp': '2025-10-01 04:28:04.416327', 'step': 8442, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:04.469786', 'step': 8442, 'epoch': 2} {'type': 'loss', 'content': 0.07016574591398239, 'timestamp': '2025-10-01 04:28:04.472140', 'step': 8443, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:04.525456', 'step': 8443, 'epoch': 2} {'type': 'loss', 'content': 0.20442187786102295, 'timestamp': '2025-10-01 04:28:04.531436', 'step': 8444, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:04.584614', 'step': 8444, 'epoch': 2} {'type': 'loss', 'content': 0.13611049950122833, 'timestamp': '2025-10-01 04:28:04.587146', 'step': 8445, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:04.641065', 'step': 8445, 'epoch': 2} {'type': 'loss', 'content': 0.06732577830553055, 'timestamp': '2025-10-01 04:28:04.643150', 'step': 8446, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:04.696724', 'step': 8446, 'epoch': 2} {'type': 'loss', 'content': 0.10614002496004105, 'timestamp': '2025-10-01 04:28:04.699586', 'step': 8447, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:04.752739', 'step': 8447, 'epoch': 2} {'type': 'loss', 'content': 0.16543275117874146, 'timestamp': '2025-10-01 04:28:04.759920', 'step': 8448, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:04.813645', 'step': 8448, 'epoch': 2} {'type': 'loss', 'content': 0.20044057071208954, 'timestamp': '2025-10-01 04:28:04.815877', 'step': 8449, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:04.882157', 'step': 8449, 'epoch': 2} {'type': 'loss', 'content': 0.12493077665567398, 'timestamp': '2025-10-01 04:28:04.887922', 'step': 8450, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:04.941611', 'step': 8450, 'epoch': 2} {'type': 'loss', 'content': 0.08723488450050354, 'timestamp': '2025-10-01 04:28:04.943848', 'step': 8451, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:04.996990', 'step': 8451, 'epoch': 2} {'type': 'loss', 'content': 0.16754671931266785, 'timestamp': '2025-10-01 04:28:05.002906', 'step': 8452, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:05.055350', 'step': 8452, 'epoch': 2} {'type': 'loss', 'content': 0.1281708925962448, 'timestamp': '2025-10-01 04:28:05.058562', 'step': 8453, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:28:05.114390', 'step': 8453, 'epoch': 2} {'type': 'loss', 'content': 0.181842640042305, 'timestamp': '2025-10-01 04:28:05.116689', 'step': 8454, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:05.170040', 'step': 8454, 'epoch': 2} {'type': 'loss', 'content': 0.13256484270095825, 'timestamp': '2025-10-01 04:28:05.172326', 'step': 8455, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:28:05.226660', 'step': 8455, 'epoch': 2} {'type': 'loss', 'content': 0.14549767971038818, 'timestamp': '2025-10-01 04:28:05.232402', 'step': 8456, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:05.295562', 'step': 8456, 'epoch': 2} {'type': 'loss', 'content': 0.11925866454839706, 'timestamp': '2025-10-01 04:28:05.297761', 'step': 8457, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:05.351720', 'step': 8457, 'epoch': 2} {'type': 'loss', 'content': 0.16425786912441254, 'timestamp': '2025-10-01 04:28:05.354038', 'step': 8458, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:05.407034', 'step': 8458, 'epoch': 2} {'type': 'loss', 'content': 0.15358801186084747, 'timestamp': '2025-10-01 04:28:05.409288', 'step': 8459, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:05.462029', 'step': 8459, 'epoch': 2} {'type': 'loss', 'content': 0.1074165627360344, 'timestamp': '2025-10-01 04:28:05.467808', 'step': 8460, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:05.520410', 'step': 8460, 'epoch': 2} {'type': 'loss', 'content': 0.13507647812366486, 'timestamp': '2025-10-01 04:28:05.522408', 'step': 8461, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:05.575242', 'step': 8461, 'epoch': 2} {'type': 'loss', 'content': 0.17140483856201172, 'timestamp': '2025-10-01 04:28:05.577349', 'step': 8462, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:05.630454', 'step': 8462, 'epoch': 2} {'type': 'loss', 'content': 0.23597434163093567, 'timestamp': '2025-10-01 04:28:05.632964', 'step': 8463, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:05.685958', 'step': 8463, 'epoch': 2} {'type': 'loss', 'content': 0.09107711166143417, 'timestamp': '2025-10-01 04:28:05.691726', 'step': 8464, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:05.756533', 'step': 8464, 'epoch': 2} {'type': 'loss', 'content': 0.12637823820114136, 'timestamp': '2025-10-01 04:28:05.758771', 'step': 8465, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:05.812104', 'step': 8465, 'epoch': 2} {'type': 'loss', 'content': 0.1472872644662857, 'timestamp': '2025-10-01 04:28:05.814547', 'step': 8466, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:05.867128', 'step': 8466, 'epoch': 2} {'type': 'loss', 'content': 0.16879235208034515, 'timestamp': '2025-10-01 04:28:05.869370', 'step': 8467, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:05.922521', 'step': 8467, 'epoch': 2} {'type': 'loss', 'content': 0.09272913634777069, 'timestamp': '2025-10-01 04:28:05.937568', 'step': 8468, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:05.989661', 'step': 8468, 'epoch': 2} {'type': 'loss', 'content': 0.1510692685842514, 'timestamp': '2025-10-01 04:28:05.991968', 'step': 8469, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:06.045190', 'step': 8469, 'epoch': 2} {'type': 'loss', 'content': 0.08939304202795029, 'timestamp': '2025-10-01 04:28:06.055746', 'step': 8470, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:28:06.108824', 'step': 8470, 'epoch': 2} {'type': 'loss', 'content': 0.22648708522319794, 'timestamp': '2025-10-01 04:28:06.111078', 'step': 8471, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:06.164165', 'step': 8471, 'epoch': 2} {'type': 'loss', 'content': 0.17020359635353088, 'timestamp': '2025-10-01 04:28:06.180559', 'step': 8472, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:28:06.235430', 'step': 8472, 'epoch': 2} {'type': 'loss', 'content': 0.1878795325756073, 'timestamp': '2025-10-01 04:28:06.237534', 'step': 8473, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:06.292063', 'step': 8473, 'epoch': 2} {'type': 'loss', 'content': 0.1052500531077385, 'timestamp': '2025-10-01 04:28:06.294663', 'step': 8474, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:06.354100', 'step': 8474, 'epoch': 2} {'type': 'loss', 'content': 0.06352417916059494, 'timestamp': '2025-10-01 04:28:06.356413', 'step': 8475, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:06.410550', 'step': 8475, 'epoch': 2} {'type': 'loss', 'content': 0.20773717761039734, 'timestamp': '2025-10-01 04:28:06.416754', 'step': 8476, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:06.470313', 'step': 8476, 'epoch': 2} {'type': 'loss', 'content': 0.10389997065067291, 'timestamp': '2025-10-01 04:28:06.472533', 'step': 8477, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:06.526884', 'step': 8477, 'epoch': 2} {'type': 'loss', 'content': 0.10786311328411102, 'timestamp': '2025-10-01 04:28:06.529399', 'step': 8478, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:06.583345', 'step': 8478, 'epoch': 2} {'type': 'loss', 'content': 0.0848887711763382, 'timestamp': '2025-10-01 04:28:06.585919', 'step': 8479, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:06.639637', 'step': 8479, 'epoch': 2} {'type': 'loss', 'content': 0.15546606481075287, 'timestamp': '2025-10-01 04:28:06.645354', 'step': 8480, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:06.703450', 'step': 8480, 'epoch': 2} {'type': 'loss', 'content': 0.11891867220401764, 'timestamp': '2025-10-01 04:28:06.705913', 'step': 8481, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:06.760946', 'step': 8481, 'epoch': 2} {'type': 'loss', 'content': 0.24769818782806396, 'timestamp': '2025-10-01 04:28:06.763200', 'step': 8482, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:06.818497', 'step': 8482, 'epoch': 2} {'type': 'loss', 'content': 0.16786746680736542, 'timestamp': '2025-10-01 04:28:06.821039', 'step': 8483, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:06.874815', 'step': 8483, 'epoch': 2} {'type': 'loss', 'content': 0.11455489695072174, 'timestamp': '2025-10-01 04:28:06.889462', 'step': 8484, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:06.943385', 'step': 8484, 'epoch': 2} {'type': 'loss', 'content': 0.10688651353120804, 'timestamp': '2025-10-01 04:28:06.956084', 'step': 8485, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:07.011137', 'step': 8485, 'epoch': 2} {'type': 'loss', 'content': 0.16298295557498932, 'timestamp': '2025-10-01 04:28:07.013569', 'step': 8486, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:07.067531', 'step': 8486, 'epoch': 2} {'type': 'loss', 'content': 0.11403485387563705, 'timestamp': '2025-10-01 04:28:07.070056', 'step': 8487, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:07.124613', 'step': 8487, 'epoch': 2} {'type': 'loss', 'content': 0.2595160901546478, 'timestamp': '2025-10-01 04:28:07.130695', 'step': 8488, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:07.184717', 'step': 8488, 'epoch': 2} {'type': 'loss', 'content': 0.17846010625362396, 'timestamp': '2025-10-01 04:28:07.186915', 'step': 8489, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:28:07.241205', 'step': 8489, 'epoch': 2} {'type': 'loss', 'content': 0.11492903530597687, 'timestamp': '2025-10-01 04:28:07.256290', 'step': 8490, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:07.310304', 'step': 8490, 'epoch': 2} {'type': 'loss', 'content': 0.26681941747665405, 'timestamp': '2025-10-01 04:28:07.313045', 'step': 8491, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:07.374485', 'step': 8491, 'epoch': 2} {'type': 'loss', 'content': 0.12312956154346466, 'timestamp': '2025-10-01 04:28:07.380907', 'step': 8492, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:07.434278', 'step': 8492, 'epoch': 2} {'type': 'loss', 'content': 0.2556532025337219, 'timestamp': '2025-10-01 04:28:07.437429', 'step': 8493, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:07.493207', 'step': 8493, 'epoch': 2} {'type': 'loss', 'content': 0.14142759144306183, 'timestamp': '2025-10-01 04:28:07.496089', 'step': 8494, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:07.549213', 'step': 8494, 'epoch': 2} {'type': 'loss', 'content': 0.2411680668592453, 'timestamp': '2025-10-01 04:28:07.554919', 'step': 8495, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:07.609073', 'step': 8495, 'epoch': 2} {'type': 'loss', 'content': 0.1595170646905899, 'timestamp': '2025-10-01 04:28:07.615383', 'step': 8496, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:07.670437', 'step': 8496, 'epoch': 2} {'type': 'loss', 'content': 0.14379163086414337, 'timestamp': '2025-10-01 04:28:07.673066', 'step': 8497, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:07.727580', 'step': 8497, 'epoch': 2} {'type': 'loss', 'content': 0.10528717190027237, 'timestamp': '2025-10-01 04:28:07.729709', 'step': 8498, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:07.784205', 'step': 8498, 'epoch': 2} {'type': 'loss', 'content': 0.10531741380691528, 'timestamp': '2025-10-01 04:28:07.792830', 'step': 8499, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:28:07.847402', 'step': 8499, 'epoch': 2} {'type': 'loss', 'content': 0.1428324282169342, 'timestamp': '2025-10-01 04:28:07.854023', 'step': 8500, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 8500', 'timestamp': '2025-10-01 04:28:08.237415', 'step': 8500, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:08.301786', 'step': 8500, 'epoch': 2} {'type': 'loss', 'content': 0.2155051976442337, 'timestamp': '2025-10-01 04:28:08.303935', 'step': 8501, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:08.358676', 'step': 8501, 'epoch': 2} {'type': 'loss', 'content': 0.2293073832988739, 'timestamp': '2025-10-01 04:28:08.360941', 'step': 8502, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:28:08.414670', 'step': 8502, 'epoch': 2} {'type': 'loss', 'content': 0.1173563003540039, 'timestamp': '2025-10-01 04:28:08.417354', 'step': 8503, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:08.471379', 'step': 8503, 'epoch': 2} {'type': 'loss', 'content': 0.19025129079818726, 'timestamp': '2025-10-01 04:28:08.477777', 'step': 8504, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:08.540942', 'step': 8504, 'epoch': 2} {'type': 'loss', 'content': 0.15580040216445923, 'timestamp': '2025-10-01 04:28:08.544092', 'step': 8505, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:08.599483', 'step': 8505, 'epoch': 2} {'type': 'loss', 'content': 0.07738779485225677, 'timestamp': '2025-10-01 04:28:08.601814', 'step': 8506, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:08.655446', 'step': 8506, 'epoch': 2} {'type': 'loss', 'content': 0.16427749395370483, 'timestamp': '2025-10-01 04:28:08.659370', 'step': 8507, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:08.714253', 'step': 8507, 'epoch': 2} {'type': 'loss', 'content': 0.16349340975284576, 'timestamp': '2025-10-01 04:28:08.720652', 'step': 8508, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:08.774816', 'step': 8508, 'epoch': 2} {'type': 'loss', 'content': 0.1266460120677948, 'timestamp': '2025-10-01 04:28:08.783744', 'step': 8509, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:08.844768', 'step': 8509, 'epoch': 2} {'type': 'loss', 'content': 0.07925976812839508, 'timestamp': '2025-10-01 04:28:08.846684', 'step': 8510, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:08.900577', 'step': 8510, 'epoch': 2} {'type': 'loss', 'content': 0.09198299050331116, 'timestamp': '2025-10-01 04:28:08.910932', 'step': 8511, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:08.966587', 'step': 8511, 'epoch': 2} {'type': 'loss', 'content': 0.10435954481363297, 'timestamp': '2025-10-01 04:28:08.972751', 'step': 8512, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:09.028413', 'step': 8512, 'epoch': 2} {'type': 'loss', 'content': 0.12719444930553436, 'timestamp': '2025-10-01 04:28:09.030484', 'step': 8513, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:09.086520', 'step': 8513, 'epoch': 2} {'type': 'loss', 'content': 0.12085718661546707, 'timestamp': '2025-10-01 04:28:09.088507', 'step': 8514, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:09.142251', 'step': 8514, 'epoch': 2} {'type': 'loss', 'content': 0.09377887845039368, 'timestamp': '2025-10-01 04:28:09.144351', 'step': 8515, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:28:09.198313', 'step': 8515, 'epoch': 2} {'type': 'loss', 'content': 0.20856323838233948, 'timestamp': '2025-10-01 04:28:09.204453', 'step': 8516, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:09.257234', 'step': 8516, 'epoch': 2} {'type': 'loss', 'content': 0.21698474884033203, 'timestamp': '2025-10-01 04:28:09.259424', 'step': 8517, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:09.312758', 'step': 8517, 'epoch': 2} {'type': 'loss', 'content': 0.08813368529081345, 'timestamp': '2025-10-01 04:28:09.314781', 'step': 8518, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:09.368020', 'step': 8518, 'epoch': 2} {'type': 'loss', 'content': 0.105741947889328, 'timestamp': '2025-10-01 04:28:09.370847', 'step': 8519, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:09.423785', 'step': 8519, 'epoch': 2} {'type': 'loss', 'content': 0.1941041797399521, 'timestamp': '2025-10-01 04:28:09.429847', 'step': 8520, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:09.482727', 'step': 8520, 'epoch': 2} {'type': 'loss', 'content': 0.2764931619167328, 'timestamp': '2025-10-01 04:28:09.484868', 'step': 8521, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:09.538545', 'step': 8521, 'epoch': 2} {'type': 'loss', 'content': 0.09153196215629578, 'timestamp': '2025-10-01 04:28:09.540581', 'step': 8522, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:09.595343', 'step': 8522, 'epoch': 2} {'type': 'loss', 'content': 0.15075360238552094, 'timestamp': '2025-10-01 04:28:09.597569', 'step': 8523, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:09.651684', 'step': 8523, 'epoch': 2} {'type': 'loss', 'content': 0.15196149051189423, 'timestamp': '2025-10-01 04:28:09.657428', 'step': 8524, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:09.710107', 'step': 8524, 'epoch': 2} {'type': 'loss', 'content': 0.155207559466362, 'timestamp': '2025-10-01 04:28:09.712060', 'step': 8525, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:09.765270', 'step': 8525, 'epoch': 2} {'type': 'loss', 'content': 0.1499795764684677, 'timestamp': '2025-10-01 04:28:09.767386', 'step': 8526, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:28:09.832627', 'step': 8526, 'epoch': 2} {'type': 'loss', 'content': 0.1520908921957016, 'timestamp': '2025-10-01 04:28:09.834720', 'step': 8527, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:09.888646', 'step': 8527, 'epoch': 2} {'type': 'loss', 'content': 0.16386361420154572, 'timestamp': '2025-10-01 04:28:09.894407', 'step': 8528, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:09.947426', 'step': 8528, 'epoch': 2} {'type': 'loss', 'content': 0.12998920679092407, 'timestamp': '2025-10-01 04:28:09.949883', 'step': 8529, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:10.017996', 'step': 8529, 'epoch': 2} {'type': 'loss', 'content': 0.1439048945903778, 'timestamp': '2025-10-01 04:28:10.041027', 'step': 8530, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:10.095760', 'step': 8530, 'epoch': 2} {'type': 'loss', 'content': 0.15000581741333008, 'timestamp': '2025-10-01 04:28:10.098007', 'step': 8531, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:10.151314', 'step': 8531, 'epoch': 2} {'type': 'loss', 'content': 0.1520228087902069, 'timestamp': '2025-10-01 04:28:10.157074', 'step': 8532, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:10.210345', 'step': 8532, 'epoch': 2} {'type': 'loss', 'content': 0.14662711322307587, 'timestamp': '2025-10-01 04:28:10.212342', 'step': 8533, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:10.266088', 'step': 8533, 'epoch': 2} {'type': 'loss', 'content': 0.15013931691646576, 'timestamp': '2025-10-01 04:28:10.268077', 'step': 8534, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:10.321311', 'step': 8534, 'epoch': 2} {'type': 'loss', 'content': 0.14654575288295746, 'timestamp': '2025-10-01 04:28:10.323422', 'step': 8535, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:10.382524', 'step': 8535, 'epoch': 2} {'type': 'loss', 'content': 0.10101858526468277, 'timestamp': '2025-10-01 04:28:10.388253', 'step': 8536, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:10.449164', 'step': 8536, 'epoch': 2} {'type': 'loss', 'content': 0.19166120886802673, 'timestamp': '2025-10-01 04:28:10.454467', 'step': 8537, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:10.513267', 'step': 8537, 'epoch': 2} {'type': 'loss', 'content': 0.09452646225690842, 'timestamp': '2025-10-01 04:28:10.515561', 'step': 8538, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:10.569133', 'step': 8538, 'epoch': 2} {'type': 'loss', 'content': 0.0680328831076622, 'timestamp': '2025-10-01 04:28:10.571434', 'step': 8539, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:10.625098', 'step': 8539, 'epoch': 2} {'type': 'loss', 'content': 0.23484617471694946, 'timestamp': '2025-10-01 04:28:10.630791', 'step': 8540, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:28:10.683684', 'step': 8540, 'epoch': 2} {'type': 'loss', 'content': 0.0892752930521965, 'timestamp': '2025-10-01 04:28:10.685841', 'step': 8541, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:10.746320', 'step': 8541, 'epoch': 2} {'type': 'loss', 'content': 0.05846939980983734, 'timestamp': '2025-10-01 04:28:10.757840', 'step': 8542, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:10.811002', 'step': 8542, 'epoch': 2} {'type': 'loss', 'content': 0.2332429587841034, 'timestamp': '2025-10-01 04:28:10.813149', 'step': 8543, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:10.866596', 'step': 8543, 'epoch': 2} {'type': 'loss', 'content': 0.07076214253902435, 'timestamp': '2025-10-01 04:28:10.873010', 'step': 8544, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:10.926217', 'step': 8544, 'epoch': 2} {'type': 'loss', 'content': 0.16247022151947021, 'timestamp': '2025-10-01 04:28:10.928371', 'step': 8545, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:28:10.992701', 'step': 8545, 'epoch': 2} {'type': 'loss', 'content': 0.10966707020998001, 'timestamp': '2025-10-01 04:28:10.994873', 'step': 8546, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:11.047923', 'step': 8546, 'epoch': 2} {'type': 'loss', 'content': 0.14991462230682373, 'timestamp': '2025-10-01 04:28:11.049949', 'step': 8547, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:11.102898', 'step': 8547, 'epoch': 2} {'type': 'loss', 'content': 0.13264866173267365, 'timestamp': '2025-10-01 04:28:11.108700', 'step': 8548, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:28:11.164374', 'step': 8548, 'epoch': 2} {'type': 'loss', 'content': 0.08330085873603821, 'timestamp': '2025-10-01 04:28:11.166783', 'step': 8549, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:11.229122', 'step': 8549, 'epoch': 2} {'type': 'loss', 'content': 0.12568022310733795, 'timestamp': '2025-10-01 04:28:11.233337', 'step': 8550, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:11.289955', 'step': 8550, 'epoch': 2} {'type': 'loss', 'content': 0.11629899591207504, 'timestamp': '2025-10-01 04:28:11.302856', 'step': 8551, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:28:11.370932', 'step': 8551, 'epoch': 2} {'type': 'loss', 'content': 0.11412769556045532, 'timestamp': '2025-10-01 04:28:11.384548', 'step': 8552, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:11.447223', 'step': 8552, 'epoch': 2} {'type': 'loss', 'content': 0.15706254541873932, 'timestamp': '2025-10-01 04:28:11.449723', 'step': 8553, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:11.506551', 'step': 8553, 'epoch': 2} {'type': 'loss', 'content': 0.11042912304401398, 'timestamp': '2025-10-01 04:28:11.508816', 'step': 8554, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:11.562933', 'step': 8554, 'epoch': 2} {'type': 'loss', 'content': 0.16836637258529663, 'timestamp': '2025-10-01 04:28:11.565151', 'step': 8555, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:11.618833', 'step': 8555, 'epoch': 2} {'type': 'loss', 'content': 0.12208746373653412, 'timestamp': '2025-10-01 04:28:11.624844', 'step': 8556, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:28:11.678883', 'step': 8556, 'epoch': 2} {'type': 'loss', 'content': 0.1809174120426178, 'timestamp': '2025-10-01 04:28:11.681467', 'step': 8557, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:28:11.741523', 'step': 8557, 'epoch': 2} {'type': 'loss', 'content': 0.17751675844192505, 'timestamp': '2025-10-01 04:28:11.744094', 'step': 8558, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:11.799309', 'step': 8558, 'epoch': 2} {'type': 'loss', 'content': 0.1383930742740631, 'timestamp': '2025-10-01 04:28:11.801633', 'step': 8559, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:11.856180', 'step': 8559, 'epoch': 2} {'type': 'loss', 'content': 0.10916658490896225, 'timestamp': '2025-10-01 04:28:11.862002', 'step': 8560, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:28:11.914809', 'step': 8560, 'epoch': 2} {'type': 'loss', 'content': 0.09513804316520691, 'timestamp': '2025-10-01 04:28:11.917107', 'step': 8561, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:11.970469', 'step': 8561, 'epoch': 2} {'type': 'loss', 'content': 0.09829559177160263, 'timestamp': '2025-10-01 04:28:11.972596', 'step': 8562, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:28:12.026587', 'step': 8562, 'epoch': 2} {'type': 'loss', 'content': 0.16721586883068085, 'timestamp': '2025-10-01 04:28:12.029367', 'step': 8563, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:12.083476', 'step': 8563, 'epoch': 2} {'type': 'loss', 'content': 0.09899266064167023, 'timestamp': '2025-10-01 04:28:12.090177', 'step': 8564, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:12.154547', 'step': 8564, 'epoch': 2} {'type': 'loss', 'content': 0.18644945323467255, 'timestamp': '2025-10-01 04:28:12.157052', 'step': 8565, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:12.212268', 'step': 8565, 'epoch': 2} {'type': 'loss', 'content': 0.12958772480487823, 'timestamp': '2025-10-01 04:28:12.215268', 'step': 8566, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:12.270864', 'step': 8566, 'epoch': 2} {'type': 'loss', 'content': 0.14401037991046906, 'timestamp': '2025-10-01 04:28:12.273077', 'step': 8567, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:28:12.326811', 'step': 8567, 'epoch': 2} {'type': 'loss', 'content': 0.18410080671310425, 'timestamp': '2025-10-01 04:28:12.333074', 'step': 8568, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:12.386664', 'step': 8568, 'epoch': 2} {'type': 'loss', 'content': 0.11807306110858917, 'timestamp': '2025-10-01 04:28:12.395795', 'step': 8569, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:12.449741', 'step': 8569, 'epoch': 2} {'type': 'loss', 'content': 0.13991837203502655, 'timestamp': '2025-10-01 04:28:12.451951', 'step': 8570, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:28:12.512658', 'step': 8570, 'epoch': 2} {'type': 'loss', 'content': 0.14368556439876556, 'timestamp': '2025-10-01 04:28:12.514790', 'step': 8571, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:28:12.568638', 'step': 8571, 'epoch': 2} {'type': 'loss', 'content': 0.16698603332042694, 'timestamp': '2025-10-01 04:28:12.575446', 'step': 8572, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:12.634582', 'step': 8572, 'epoch': 2} {'type': 'loss', 'content': 0.18032920360565186, 'timestamp': '2025-10-01 04:28:12.636824', 'step': 8573, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:12.690629', 'step': 8573, 'epoch': 2} {'type': 'loss', 'content': 0.14179177582263947, 'timestamp': '2025-10-01 04:28:12.692806', 'step': 8574, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:28:12.748310', 'step': 8574, 'epoch': 2} {'type': 'loss', 'content': 0.08601372689008713, 'timestamp': '2025-10-01 04:28:12.750945', 'step': 8575, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:12.810533', 'step': 8575, 'epoch': 2} {'type': 'loss', 'content': 0.10145991295576096, 'timestamp': '2025-10-01 04:28:12.816443', 'step': 8576, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:12.869744', 'step': 8576, 'epoch': 2} {'type': 'loss', 'content': 0.10271447151899338, 'timestamp': '2025-10-01 04:28:12.871874', 'step': 8577, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:12.926212', 'step': 8577, 'epoch': 2} {'type': 'loss', 'content': 0.1997772753238678, 'timestamp': '2025-10-01 04:28:12.928453', 'step': 8578, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:12.983288', 'step': 8578, 'epoch': 2} {'type': 'loss', 'content': 0.17974333465099335, 'timestamp': '2025-10-01 04:28:12.985507', 'step': 8579, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:13.050423', 'step': 8579, 'epoch': 2} {'type': 'loss', 'content': 0.08754824846982956, 'timestamp': '2025-10-01 04:28:13.056725', 'step': 8580, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:13.128554', 'step': 8580, 'epoch': 2} {'type': 'loss', 'content': 0.19079472124576569, 'timestamp': '2025-10-01 04:28:13.131092', 'step': 8581, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:13.187152', 'step': 8581, 'epoch': 2} {'type': 'loss', 'content': 0.15313902497291565, 'timestamp': '2025-10-01 04:28:13.189429', 'step': 8582, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:13.245685', 'step': 8582, 'epoch': 2} {'type': 'loss', 'content': 0.11197875440120697, 'timestamp': '2025-10-01 04:28:13.247823', 'step': 8583, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:13.305958', 'step': 8583, 'epoch': 2} {'type': 'loss', 'content': 0.18602252006530762, 'timestamp': '2025-10-01 04:28:13.313093', 'step': 8584, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:13.368624', 'step': 8584, 'epoch': 2} {'type': 'loss', 'content': 0.10698417574167252, 'timestamp': '2025-10-01 04:28:13.370933', 'step': 8585, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:13.427203', 'step': 8585, 'epoch': 2} {'type': 'loss', 'content': 0.12582182884216309, 'timestamp': '2025-10-01 04:28:13.429317', 'step': 8586, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:28:13.494715', 'step': 8586, 'epoch': 2} {'type': 'loss', 'content': 0.13377007842063904, 'timestamp': '2025-10-01 04:28:13.496616', 'step': 8587, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:13.552264', 'step': 8587, 'epoch': 2} {'type': 'loss', 'content': 0.1524733603000641, 'timestamp': '2025-10-01 04:28:13.558404', 'step': 8588, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:13.623405', 'step': 8588, 'epoch': 2} {'type': 'loss', 'content': 0.10884054750204086, 'timestamp': '2025-10-01 04:28:13.625507', 'step': 8589, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:13.679507', 'step': 8589, 'epoch': 2} {'type': 'loss', 'content': 0.15744054317474365, 'timestamp': '2025-10-01 04:28:13.681442', 'step': 8590, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:28:13.735138', 'step': 8590, 'epoch': 2} {'type': 'loss', 'content': 0.20633341372013092, 'timestamp': '2025-10-01 04:28:13.737356', 'step': 8591, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:13.791737', 'step': 8591, 'epoch': 2} {'type': 'loss', 'content': 0.1181730180978775, 'timestamp': '2025-10-01 04:28:13.797813', 'step': 8592, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:13.850985', 'step': 8592, 'epoch': 2} {'type': 'loss', 'content': 0.21214815974235535, 'timestamp': '2025-10-01 04:28:13.852919', 'step': 8593, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:13.910104', 'step': 8593, 'epoch': 2} {'type': 'loss', 'content': 0.10293447226285934, 'timestamp': '2025-10-01 04:28:13.911776', 'step': 8594, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:28:13.965723', 'step': 8594, 'epoch': 2} {'type': 'loss', 'content': 0.1771668791770935, 'timestamp': '2025-10-01 04:28:13.969002', 'step': 8595, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:14.034264', 'step': 8595, 'epoch': 2} {'type': 'loss', 'content': 0.24978314340114594, 'timestamp': '2025-10-01 04:28:14.054702', 'step': 8596, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:14.108257', 'step': 8596, 'epoch': 2} {'type': 'loss', 'content': 0.09762217849493027, 'timestamp': '2025-10-01 04:28:14.115989', 'step': 8597, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:14.169183', 'step': 8597, 'epoch': 2} {'type': 'loss', 'content': 0.12735547125339508, 'timestamp': '2025-10-01 04:28:14.171405', 'step': 8598, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:28:14.225112', 'step': 8598, 'epoch': 2} {'type': 'loss', 'content': 0.14936983585357666, 'timestamp': '2025-10-01 04:28:14.226790', 'step': 8599, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:14.294880', 'step': 8599, 'epoch': 2} {'type': 'loss', 'content': 0.05783787742257118, 'timestamp': '2025-10-01 04:28:14.301106', 'step': 8600, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:14.354304', 'step': 8600, 'epoch': 2} {'type': 'loss', 'content': 0.09823109209537506, 'timestamp': '2025-10-01 04:28:14.356446', 'step': 8601, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:14.411251', 'step': 8601, 'epoch': 2} {'type': 'loss', 'content': 0.16821765899658203, 'timestamp': '2025-10-01 04:28:14.413653', 'step': 8602, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:28:14.468225', 'step': 8602, 'epoch': 2} {'type': 'loss', 'content': 0.09768466651439667, 'timestamp': '2025-10-01 04:28:14.470405', 'step': 8603, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:14.525336', 'step': 8603, 'epoch': 2} {'type': 'loss', 'content': 0.15750986337661743, 'timestamp': '2025-10-01 04:28:14.531914', 'step': 8604, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:28:14.585720', 'step': 8604, 'epoch': 2} {'type': 'loss', 'content': 0.15696260333061218, 'timestamp': '2025-10-01 04:28:14.588124', 'step': 8605, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:14.641914', 'step': 8605, 'epoch': 2} {'type': 'loss', 'content': 0.0832740068435669, 'timestamp': '2025-10-01 04:28:14.644240', 'step': 8606, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:14.700306', 'step': 8606, 'epoch': 2} {'type': 'loss', 'content': 0.1395808309316635, 'timestamp': '2025-10-01 04:28:14.702293', 'step': 8607, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:14.763119', 'step': 8607, 'epoch': 2} {'type': 'loss', 'content': 0.1634765863418579, 'timestamp': '2025-10-01 04:28:14.770142', 'step': 8608, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:14.846202', 'step': 8608, 'epoch': 2} {'type': 'loss', 'content': 0.20573662221431732, 'timestamp': '2025-10-01 04:28:14.854987', 'step': 8609, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:14.928693', 'step': 8609, 'epoch': 2} {'type': 'loss', 'content': 0.11387239396572113, 'timestamp': '2025-10-01 04:28:14.933751', 'step': 8610, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:14.998770', 'step': 8610, 'epoch': 2} {'type': 'loss', 'content': 0.09260164946317673, 'timestamp': '2025-10-01 04:28:15.001262', 'step': 8611, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:15.063344', 'step': 8611, 'epoch': 2} {'type': 'loss', 'content': 0.09468066692352295, 'timestamp': '2025-10-01 04:28:15.075350', 'step': 8612, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:15.132416', 'step': 8612, 'epoch': 2} {'type': 'loss', 'content': 0.11351853609085083, 'timestamp': '2025-10-01 04:28:15.134853', 'step': 8613, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:15.190341', 'step': 8613, 'epoch': 2} {'type': 'loss', 'content': 0.10520109534263611, 'timestamp': '2025-10-01 04:28:15.192687', 'step': 8614, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:15.251827', 'step': 8614, 'epoch': 2} {'type': 'loss', 'content': 0.11122120916843414, 'timestamp': '2025-10-01 04:28:15.254004', 'step': 8615, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-01 04:28:15.328832', 'step': 8615, 'epoch': 2} {'type': 'loss', 'content': 0.17745104432106018, 'timestamp': '2025-10-01 04:28:15.335095', 'step': 8616, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:28:15.391485', 'step': 8616, 'epoch': 2} {'type': 'loss', 'content': 0.17808212339878082, 'timestamp': '2025-10-01 04:28:15.394017', 'step': 8617, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:15.448567', 'step': 8617, 'epoch': 2} {'type': 'loss', 'content': 0.1623820662498474, 'timestamp': '2025-10-01 04:28:15.450660', 'step': 8618, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:15.505339', 'step': 8618, 'epoch': 2} {'type': 'loss', 'content': 0.07283367961645126, 'timestamp': '2025-10-01 04:28:15.507249', 'step': 8619, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:15.561392', 'step': 8619, 'epoch': 2} {'type': 'loss', 'content': 0.14945833384990692, 'timestamp': '2025-10-01 04:28:15.572627', 'step': 8620, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:15.630327', 'step': 8620, 'epoch': 2} {'type': 'loss', 'content': 0.1388503611087799, 'timestamp': '2025-10-01 04:28:15.632553', 'step': 8621, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:28:15.693964', 'step': 8621, 'epoch': 2} {'type': 'loss', 'content': 0.19245705008506775, 'timestamp': '2025-10-01 04:28:15.696891', 'step': 8622, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:15.751138', 'step': 8622, 'epoch': 2} {'type': 'loss', 'content': 0.11983586847782135, 'timestamp': '2025-10-01 04:28:15.753937', 'step': 8623, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:28:15.810528', 'step': 8623, 'epoch': 2} {'type': 'loss', 'content': 0.0661085769534111, 'timestamp': '2025-10-01 04:28:15.817052', 'step': 8624, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:15.871410', 'step': 8624, 'epoch': 2} {'type': 'loss', 'content': 0.10351218283176422, 'timestamp': '2025-10-01 04:28:15.873957', 'step': 8625, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:15.929129', 'step': 8625, 'epoch': 2} {'type': 'loss', 'content': 0.11313804239034653, 'timestamp': '2025-10-01 04:28:15.930906', 'step': 8626, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:15.985423', 'step': 8626, 'epoch': 2} {'type': 'loss', 'content': 0.1664067953824997, 'timestamp': '2025-10-01 04:28:15.987650', 'step': 8627, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:16.042997', 'step': 8627, 'epoch': 2} {'type': 'loss', 'content': 0.11569627374410629, 'timestamp': '2025-10-01 04:28:16.049104', 'step': 8628, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:16.133994', 'step': 8628, 'epoch': 2} {'type': 'loss', 'content': 0.09379206597805023, 'timestamp': '2025-10-01 04:28:16.136249', 'step': 8629, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:16.190681', 'step': 8629, 'epoch': 2} {'type': 'loss', 'content': 0.1681254357099533, 'timestamp': '2025-10-01 04:28:16.197459', 'step': 8630, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:16.255397', 'step': 8630, 'epoch': 2} {'type': 'loss', 'content': 0.1286582499742508, 'timestamp': '2025-10-01 04:28:16.257468', 'step': 8631, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:16.310634', 'step': 8631, 'epoch': 2} {'type': 'loss', 'content': 0.15164631605148315, 'timestamp': '2025-10-01 04:28:16.316678', 'step': 8632, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:16.369863', 'step': 8632, 'epoch': 2} {'type': 'loss', 'content': 0.1036735400557518, 'timestamp': '2025-10-01 04:28:16.372186', 'step': 8633, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:28:16.438415', 'step': 8633, 'epoch': 2} {'type': 'loss', 'content': 0.1490021049976349, 'timestamp': '2025-10-01 04:28:16.440605', 'step': 8634, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:16.494980', 'step': 8634, 'epoch': 2} {'type': 'loss', 'content': 0.12119102478027344, 'timestamp': '2025-10-01 04:28:16.497073', 'step': 8635, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:16.550318', 'step': 8635, 'epoch': 2} {'type': 'loss', 'content': 0.18741370737552643, 'timestamp': '2025-10-01 04:28:16.556616', 'step': 8636, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:16.610493', 'step': 8636, 'epoch': 2} {'type': 'loss', 'content': 0.10749465227127075, 'timestamp': '2025-10-01 04:28:16.612466', 'step': 8637, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:16.665476', 'step': 8637, 'epoch': 2} {'type': 'loss', 'content': 0.14832153916358948, 'timestamp': '2025-10-01 04:28:16.667220', 'step': 8638, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:16.721247', 'step': 8638, 'epoch': 2} {'type': 'loss', 'content': 0.10864027589559555, 'timestamp': '2025-10-01 04:28:16.723235', 'step': 8639, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:16.777381', 'step': 8639, 'epoch': 2} {'type': 'loss', 'content': 0.1826373189687729, 'timestamp': '2025-10-01 04:28:16.782578', 'step': 8640, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:16.835722', 'step': 8640, 'epoch': 2} {'type': 'loss', 'content': 0.13093335926532745, 'timestamp': '2025-10-01 04:28:16.852274', 'step': 8641, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:16.918147', 'step': 8641, 'epoch': 2} {'type': 'loss', 'content': 0.13112235069274902, 'timestamp': '2025-10-01 04:28:16.927154', 'step': 8642, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:16.980755', 'step': 8642, 'epoch': 2} {'type': 'loss', 'content': 0.16761092841625214, 'timestamp': '2025-10-01 04:28:16.982929', 'step': 8643, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:28:17.039446', 'step': 8643, 'epoch': 2} {'type': 'loss', 'content': 0.09236383438110352, 'timestamp': '2025-10-01 04:28:17.045247', 'step': 8644, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:28:17.111312', 'step': 8644, 'epoch': 2} {'type': 'loss', 'content': 0.05741351097822189, 'timestamp': '2025-10-01 04:28:17.113132', 'step': 8645, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:17.179989', 'step': 8645, 'epoch': 2} {'type': 'loss', 'content': 0.11643845587968826, 'timestamp': '2025-10-01 04:28:17.182492', 'step': 8646, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:17.235749', 'step': 8646, 'epoch': 2} {'type': 'loss', 'content': 0.1918141394853592, 'timestamp': '2025-10-01 04:28:17.238069', 'step': 8647, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:17.291724', 'step': 8647, 'epoch': 2} {'type': 'loss', 'content': 0.18343660235404968, 'timestamp': '2025-10-01 04:28:17.297695', 'step': 8648, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:17.351186', 'step': 8648, 'epoch': 2} {'type': 'loss', 'content': 0.07347822934389114, 'timestamp': '2025-10-01 04:28:17.353460', 'step': 8649, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:17.406658', 'step': 8649, 'epoch': 2} {'type': 'loss', 'content': 0.12852586805820465, 'timestamp': '2025-10-01 04:28:17.408555', 'step': 8650, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:17.461668', 'step': 8650, 'epoch': 2} {'type': 'loss', 'content': 0.17991697788238525, 'timestamp': '2025-10-01 04:28:17.463822', 'step': 8651, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:17.523036', 'step': 8651, 'epoch': 2} {'type': 'loss', 'content': 0.0567145049571991, 'timestamp': '2025-10-01 04:28:17.533282', 'step': 8652, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:17.588006', 'step': 8652, 'epoch': 2} {'type': 'loss', 'content': 0.12875665724277496, 'timestamp': '2025-10-01 04:28:17.592489', 'step': 8653, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:17.645911', 'step': 8653, 'epoch': 2} {'type': 'loss', 'content': 0.16288426518440247, 'timestamp': '2025-10-01 04:28:17.648291', 'step': 8654, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:17.702132', 'step': 8654, 'epoch': 2} {'type': 'loss', 'content': 0.14773143827915192, 'timestamp': '2025-10-01 04:28:17.711056', 'step': 8655, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:17.764219', 'step': 8655, 'epoch': 2} {'type': 'loss', 'content': 0.14625951647758484, 'timestamp': '2025-10-01 04:28:17.769943', 'step': 8656, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:17.834559', 'step': 8656, 'epoch': 2} {'type': 'loss', 'content': 0.1632651537656784, 'timestamp': '2025-10-01 04:28:17.837282', 'step': 8657, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:17.891486', 'step': 8657, 'epoch': 2} {'type': 'loss', 'content': 0.12485288083553314, 'timestamp': '2025-10-01 04:28:17.902614', 'step': 8658, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:17.956990', 'step': 8658, 'epoch': 2} {'type': 'loss', 'content': 0.11099528521299362, 'timestamp': '2025-10-01 04:28:17.959048', 'step': 8659, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:18.014976', 'step': 8659, 'epoch': 2} {'type': 'loss', 'content': 0.10914760082960129, 'timestamp': '2025-10-01 04:28:18.020891', 'step': 8660, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:18.076775', 'step': 8660, 'epoch': 2} {'type': 'loss', 'content': 0.06683113425970078, 'timestamp': '2025-10-01 04:28:18.078783', 'step': 8661, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:18.131697', 'step': 8661, 'epoch': 2} {'type': 'loss', 'content': 0.09199576079845428, 'timestamp': '2025-10-01 04:28:18.133887', 'step': 8662, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:18.187960', 'step': 8662, 'epoch': 2} {'type': 'loss', 'content': 0.10906408727169037, 'timestamp': '2025-10-01 04:28:18.190719', 'step': 8663, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:18.243997', 'step': 8663, 'epoch': 2} {'type': 'loss', 'content': 0.1259298324584961, 'timestamp': '2025-10-01 04:28:18.250095', 'step': 8664, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:18.303652', 'step': 8664, 'epoch': 2} {'type': 'loss', 'content': 0.11824890971183777, 'timestamp': '2025-10-01 04:28:18.305773', 'step': 8665, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:28:18.358862', 'step': 8665, 'epoch': 2} {'type': 'loss', 'content': 0.1932714283466339, 'timestamp': '2025-10-01 04:28:18.360817', 'step': 8666, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:18.415632', 'step': 8666, 'epoch': 2} {'type': 'loss', 'content': 0.10775109380483627, 'timestamp': '2025-10-01 04:28:18.417993', 'step': 8667, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:18.472089', 'step': 8667, 'epoch': 2} {'type': 'loss', 'content': 0.09761356562376022, 'timestamp': '2025-10-01 04:28:18.477847', 'step': 8668, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:18.530783', 'step': 8668, 'epoch': 2} {'type': 'loss', 'content': 0.08686747401952744, 'timestamp': '2025-10-01 04:28:18.532671', 'step': 8669, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:28:18.585874', 'step': 8669, 'epoch': 2} {'type': 'loss', 'content': 0.2152053713798523, 'timestamp': '2025-10-01 04:28:18.587814', 'step': 8670, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:18.641637', 'step': 8670, 'epoch': 2} {'type': 'loss', 'content': 0.10452856868505478, 'timestamp': '2025-10-01 04:28:18.644590', 'step': 8671, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-01 04:28:18.699709', 'step': 8671, 'epoch': 2} {'type': 'loss', 'content': 0.16693931818008423, 'timestamp': '2025-10-01 04:28:18.706044', 'step': 8672, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:18.759221', 'step': 8672, 'epoch': 2} {'type': 'loss', 'content': 0.10963853448629379, 'timestamp': '2025-10-01 04:28:18.761477', 'step': 8673, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:18.815358', 'step': 8673, 'epoch': 2} {'type': 'loss', 'content': 0.1015741229057312, 'timestamp': '2025-10-01 04:28:18.817350', 'step': 8674, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:18.871637', 'step': 8674, 'epoch': 2} {'type': 'loss', 'content': 0.14856331050395966, 'timestamp': '2025-10-01 04:28:18.873694', 'step': 8675, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:18.933659', 'step': 8675, 'epoch': 2} {'type': 'loss', 'content': 0.12085302919149399, 'timestamp': '2025-10-01 04:28:18.939340', 'step': 8676, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:18.992000', 'step': 8676, 'epoch': 2} {'type': 'loss', 'content': 0.14105241000652313, 'timestamp': '2025-10-01 04:28:18.993973', 'step': 8677, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:19.047789', 'step': 8677, 'epoch': 2} {'type': 'loss', 'content': 0.1485074758529663, 'timestamp': '2025-10-01 04:28:19.049837', 'step': 8678, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:19.102589', 'step': 8678, 'epoch': 2} {'type': 'loss', 'content': 0.13009710609912872, 'timestamp': '2025-10-01 04:28:19.104498', 'step': 8679, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:28:19.157497', 'step': 8679, 'epoch': 2} {'type': 'loss', 'content': 0.12416844815015793, 'timestamp': '2025-10-01 04:28:19.163220', 'step': 8680, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:19.222458', 'step': 8680, 'epoch': 2} {'type': 'loss', 'content': 0.1745045781135559, 'timestamp': '2025-10-01 04:28:19.224795', 'step': 8681, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:19.278127', 'step': 8681, 'epoch': 2} {'type': 'loss', 'content': 0.10810908675193787, 'timestamp': '2025-10-01 04:28:19.286555', 'step': 8682, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:19.341829', 'step': 8682, 'epoch': 2} {'type': 'loss', 'content': 0.13936935365200043, 'timestamp': '2025-10-01 04:28:19.344381', 'step': 8683, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:19.398471', 'step': 8683, 'epoch': 2} {'type': 'loss', 'content': 0.10136855393648148, 'timestamp': '2025-10-01 04:28:19.403952', 'step': 8684, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:19.457216', 'step': 8684, 'epoch': 2} {'type': 'loss', 'content': 0.16836632788181305, 'timestamp': '2025-10-01 04:28:19.459307', 'step': 8685, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:19.514248', 'step': 8685, 'epoch': 2} {'type': 'loss', 'content': 0.22907386720180511, 'timestamp': '2025-10-01 04:28:19.516374', 'step': 8686, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:19.585843', 'step': 8686, 'epoch': 2} {'type': 'loss', 'content': 0.18412145972251892, 'timestamp': '2025-10-01 04:28:19.588042', 'step': 8687, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:19.641329', 'step': 8687, 'epoch': 2} {'type': 'loss', 'content': 0.15658073127269745, 'timestamp': '2025-10-01 04:28:19.657888', 'step': 8688, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:19.721261', 'step': 8688, 'epoch': 2} {'type': 'loss', 'content': 0.1657363325357437, 'timestamp': '2025-10-01 04:28:19.723417', 'step': 8689, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:19.777246', 'step': 8689, 'epoch': 2} {'type': 'loss', 'content': 0.1890152096748352, 'timestamp': '2025-10-01 04:28:19.779246', 'step': 8690, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:28:19.832121', 'step': 8690, 'epoch': 2} {'type': 'loss', 'content': 0.21588106453418732, 'timestamp': '2025-10-01 04:28:19.834163', 'step': 8691, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:19.887350', 'step': 8691, 'epoch': 2} {'type': 'loss', 'content': 0.15823908150196075, 'timestamp': '2025-10-01 04:28:19.893029', 'step': 8692, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:19.945932', 'step': 8692, 'epoch': 2} {'type': 'loss', 'content': 0.1309664398431778, 'timestamp': '2025-10-01 04:28:19.947852', 'step': 8693, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:20.000768', 'step': 8693, 'epoch': 2} {'type': 'loss', 'content': 0.13319341838359833, 'timestamp': '2025-10-01 04:28:20.004101', 'step': 8694, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:20.071676', 'step': 8694, 'epoch': 2} {'type': 'loss', 'content': 0.09003925323486328, 'timestamp': '2025-10-01 04:28:20.073780', 'step': 8695, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:20.127054', 'step': 8695, 'epoch': 2} {'type': 'loss', 'content': 0.08971424400806427, 'timestamp': '2025-10-01 04:28:20.133033', 'step': 8696, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:20.185459', 'step': 8696, 'epoch': 2} {'type': 'loss', 'content': 0.22403942048549652, 'timestamp': '2025-10-01 04:28:20.187582', 'step': 8697, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-01 04:28:20.241626', 'step': 8697, 'epoch': 2} {'type': 'loss', 'content': 0.3129667043685913, 'timestamp': '2025-10-01 04:28:20.247632', 'step': 8698, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:20.302207', 'step': 8698, 'epoch': 2} {'type': 'loss', 'content': 0.10081057995557785, 'timestamp': '2025-10-01 04:28:20.304326', 'step': 8699, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:20.359528', 'step': 8699, 'epoch': 2} {'type': 'loss', 'content': 0.20706281065940857, 'timestamp': '2025-10-01 04:28:20.365824', 'step': 8700, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:20.432036', 'step': 8700, 'epoch': 2} {'type': 'loss', 'content': 0.20234042406082153, 'timestamp': '2025-10-01 04:28:20.434131', 'step': 8701, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:20.488137', 'step': 8701, 'epoch': 2} {'type': 'loss', 'content': 0.12132498621940613, 'timestamp': '2025-10-01 04:28:20.490183', 'step': 8702, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:20.543887', 'step': 8702, 'epoch': 2} {'type': 'loss', 'content': 0.12286163121461868, 'timestamp': '2025-10-01 04:28:20.545974', 'step': 8703, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:20.602019', 'step': 8703, 'epoch': 2} {'type': 'loss', 'content': 0.10573503375053406, 'timestamp': '2025-10-01 04:28:20.607680', 'step': 8704, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:20.660904', 'step': 8704, 'epoch': 2} {'type': 'loss', 'content': 0.16757476329803467, 'timestamp': '2025-10-01 04:28:20.663038', 'step': 8705, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:28:20.716729', 'step': 8705, 'epoch': 2} {'type': 'loss', 'content': 0.09540946036577225, 'timestamp': '2025-10-01 04:28:20.718775', 'step': 8706, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:20.771882', 'step': 8706, 'epoch': 2} {'type': 'loss', 'content': 0.1722438931465149, 'timestamp': '2025-10-01 04:28:20.773949', 'step': 8707, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:20.839927', 'step': 8707, 'epoch': 2} {'type': 'loss', 'content': 0.0831829234957695, 'timestamp': '2025-10-01 04:28:20.851127', 'step': 8708, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:20.904705', 'step': 8708, 'epoch': 2} {'type': 'loss', 'content': 0.1655655801296234, 'timestamp': '2025-10-01 04:28:20.906597', 'step': 8709, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:20.961234', 'step': 8709, 'epoch': 2} {'type': 'loss', 'content': 0.11238834261894226, 'timestamp': '2025-10-01 04:28:20.968301', 'step': 8710, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:21.022643', 'step': 8710, 'epoch': 2} {'type': 'loss', 'content': 0.10712312161922455, 'timestamp': '2025-10-01 04:28:21.030054', 'step': 8711, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:21.083067', 'step': 8711, 'epoch': 2} {'type': 'loss', 'content': 0.11973699182271957, 'timestamp': '2025-10-01 04:28:21.089027', 'step': 8712, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:21.146376', 'step': 8712, 'epoch': 2} {'type': 'loss', 'content': 0.11643212288618088, 'timestamp': '2025-10-01 04:28:21.148290', 'step': 8713, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:21.202103', 'step': 8713, 'epoch': 2} {'type': 'loss', 'content': 0.11377298086881638, 'timestamp': '2025-10-01 04:28:21.204231', 'step': 8714, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:21.257560', 'step': 8714, 'epoch': 2} {'type': 'loss', 'content': 0.12954729795455933, 'timestamp': '2025-10-01 04:28:21.259663', 'step': 8715, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:21.314476', 'step': 8715, 'epoch': 2} {'type': 'loss', 'content': 0.1394091546535492, 'timestamp': '2025-10-01 04:28:21.320362', 'step': 8716, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:21.374108', 'step': 8716, 'epoch': 2} {'type': 'loss', 'content': 0.13315463066101074, 'timestamp': '2025-10-01 04:28:21.376236', 'step': 8717, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:21.430294', 'step': 8717, 'epoch': 2} {'type': 'loss', 'content': 0.1835920661687851, 'timestamp': '2025-10-01 04:28:21.432997', 'step': 8718, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:21.486957', 'step': 8718, 'epoch': 2} {'type': 'loss', 'content': 0.1668500006198883, 'timestamp': '2025-10-01 04:28:21.489852', 'step': 8719, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:21.544291', 'step': 8719, 'epoch': 2} {'type': 'loss', 'content': 0.19220049679279327, 'timestamp': '2025-10-01 04:28:21.550270', 'step': 8720, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:28:21.618181', 'step': 8720, 'epoch': 2} {'type': 'loss', 'content': 0.1592218428850174, 'timestamp': '2025-10-01 04:28:21.620291', 'step': 8721, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:21.700634', 'step': 8721, 'epoch': 2} {'type': 'loss', 'content': 0.2090039998292923, 'timestamp': '2025-10-01 04:28:21.703920', 'step': 8722, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:21.758951', 'step': 8722, 'epoch': 2} {'type': 'loss', 'content': 0.13413532078266144, 'timestamp': '2025-10-01 04:28:21.761502', 'step': 8723, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:21.830936', 'step': 8723, 'epoch': 2} {'type': 'loss', 'content': 0.06578098982572556, 'timestamp': '2025-10-01 04:28:21.836966', 'step': 8724, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:21.890459', 'step': 8724, 'epoch': 2} {'type': 'loss', 'content': 0.1397004872560501, 'timestamp': '2025-10-01 04:28:21.893334', 'step': 8725, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:21.951877', 'step': 8725, 'epoch': 2} {'type': 'loss', 'content': 0.1388102024793625, 'timestamp': '2025-10-01 04:28:21.955122', 'step': 8726, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:22.025820', 'step': 8726, 'epoch': 2} {'type': 'loss', 'content': 0.19157353043556213, 'timestamp': '2025-10-01 04:28:22.028075', 'step': 8727, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:22.083847', 'step': 8727, 'epoch': 2} {'type': 'loss', 'content': 0.03129231557250023, 'timestamp': '2025-10-01 04:28:22.089723', 'step': 8728, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:22.143548', 'step': 8728, 'epoch': 2} {'type': 'loss', 'content': 0.11188830435276031, 'timestamp': '2025-10-01 04:28:22.145678', 'step': 8729, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:22.199408', 'step': 8729, 'epoch': 2} {'type': 'loss', 'content': 0.1918376237154007, 'timestamp': '2025-10-01 04:28:22.201569', 'step': 8730, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:22.255247', 'step': 8730, 'epoch': 2} {'type': 'loss', 'content': 0.17382149398326874, 'timestamp': '2025-10-01 04:28:22.257952', 'step': 8731, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:28:22.312849', 'step': 8731, 'epoch': 2} {'type': 'loss', 'content': 0.09641791135072708, 'timestamp': '2025-10-01 04:28:22.318689', 'step': 8732, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:22.371418', 'step': 8732, 'epoch': 2} {'type': 'loss', 'content': 0.16339364647865295, 'timestamp': '2025-10-01 04:28:22.373674', 'step': 8733, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:22.427007', 'step': 8733, 'epoch': 2} {'type': 'loss', 'content': 0.10981394350528717, 'timestamp': '2025-10-01 04:28:22.435399', 'step': 8734, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:22.489730', 'step': 8734, 'epoch': 2} {'type': 'loss', 'content': 0.06069958955049515, 'timestamp': '2025-10-01 04:28:22.492202', 'step': 8735, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:22.547229', 'step': 8735, 'epoch': 2} {'type': 'loss', 'content': 0.14126573503017426, 'timestamp': '2025-10-01 04:28:22.554863', 'step': 8736, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:22.619739', 'step': 8736, 'epoch': 2} {'type': 'loss', 'content': 0.19163544476032257, 'timestamp': '2025-10-01 04:28:22.626938', 'step': 8737, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:22.681677', 'step': 8737, 'epoch': 2} {'type': 'loss', 'content': 0.14771299064159393, 'timestamp': '2025-10-01 04:28:22.683860', 'step': 8738, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:22.741241', 'step': 8738, 'epoch': 2} {'type': 'loss', 'content': 0.10465747863054276, 'timestamp': '2025-10-01 04:28:22.746391', 'step': 8739, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:22.807657', 'step': 8739, 'epoch': 2} {'type': 'loss', 'content': 0.16082477569580078, 'timestamp': '2025-10-01 04:28:22.814770', 'step': 8740, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:22.868197', 'step': 8740, 'epoch': 2} {'type': 'loss', 'content': 0.1307600438594818, 'timestamp': '2025-10-01 04:28:22.870156', 'step': 8741, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:22.924950', 'step': 8741, 'epoch': 2} {'type': 'loss', 'content': 0.1021096259355545, 'timestamp': '2025-10-01 04:28:22.933224', 'step': 8742, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:22.987641', 'step': 8742, 'epoch': 2} {'type': 'loss', 'content': 0.13586260378360748, 'timestamp': '2025-10-01 04:28:22.989814', 'step': 8743, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:23.043506', 'step': 8743, 'epoch': 2} {'type': 'loss', 'content': 0.13781525194644928, 'timestamp': '2025-10-01 04:28:23.049410', 'step': 8744, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:28:23.107817', 'step': 8744, 'epoch': 2} {'type': 'loss', 'content': 0.148336723446846, 'timestamp': '2025-10-01 04:28:23.110033', 'step': 8745, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:23.164303', 'step': 8745, 'epoch': 2} {'type': 'loss', 'content': 0.17053067684173584, 'timestamp': '2025-10-01 04:28:23.166579', 'step': 8746, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:23.239549', 'step': 8746, 'epoch': 2} {'type': 'loss', 'content': 0.2014904022216797, 'timestamp': '2025-10-01 04:28:23.241942', 'step': 8747, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:23.295702', 'step': 8747, 'epoch': 2} {'type': 'loss', 'content': 0.10021506249904633, 'timestamp': '2025-10-01 04:28:23.303797', 'step': 8748, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:23.356875', 'step': 8748, 'epoch': 2} {'type': 'loss', 'content': 0.12824426591396332, 'timestamp': '2025-10-01 04:28:23.359214', 'step': 8749, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:23.412708', 'step': 8749, 'epoch': 2} {'type': 'loss', 'content': 0.14178234338760376, 'timestamp': '2025-10-01 04:28:23.414982', 'step': 8750, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:23.468468', 'step': 8750, 'epoch': 2} {'type': 'loss', 'content': 0.1623750776052475, 'timestamp': '2025-10-01 04:28:23.470873', 'step': 8751, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:23.533986', 'step': 8751, 'epoch': 2} {'type': 'loss', 'content': 0.08865810185670853, 'timestamp': '2025-10-01 04:28:23.540598', 'step': 8752, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:23.594493', 'step': 8752, 'epoch': 2} {'type': 'loss', 'content': 0.10676329582929611, 'timestamp': '2025-10-01 04:28:23.597390', 'step': 8753, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:23.652572', 'step': 8753, 'epoch': 2} {'type': 'loss', 'content': 0.1008317619562149, 'timestamp': '2025-10-01 04:28:23.657383', 'step': 8754, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:28:23.712530', 'step': 8754, 'epoch': 2} {'type': 'loss', 'content': 0.16346246004104614, 'timestamp': '2025-10-01 04:28:23.715304', 'step': 8755, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:23.771410', 'step': 8755, 'epoch': 2} {'type': 'loss', 'content': 0.20741796493530273, 'timestamp': '2025-10-01 04:28:23.782437', 'step': 8756, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:23.835897', 'step': 8756, 'epoch': 2} {'type': 'loss', 'content': 0.08658627420663834, 'timestamp': '2025-10-01 04:28:23.838413', 'step': 8757, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:23.892300', 'step': 8757, 'epoch': 2} {'type': 'loss', 'content': 0.12141665071249008, 'timestamp': '2025-10-01 04:28:23.894426', 'step': 8758, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:23.947797', 'step': 8758, 'epoch': 2} {'type': 'loss', 'content': 0.12995527684688568, 'timestamp': '2025-10-01 04:28:23.950224', 'step': 8759, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:24.003902', 'step': 8759, 'epoch': 2} {'type': 'loss', 'content': 0.10972927510738373, 'timestamp': '2025-10-01 04:28:24.009841', 'step': 8760, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:24.063562', 'step': 8760, 'epoch': 2} {'type': 'loss', 'content': 0.23927147686481476, 'timestamp': '2025-10-01 04:28:24.066037', 'step': 8761, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:24.127799', 'step': 8761, 'epoch': 2} {'type': 'loss', 'content': 0.1530769318342209, 'timestamp': '2025-10-01 04:28:24.130200', 'step': 8762, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:24.183822', 'step': 8762, 'epoch': 2} {'type': 'loss', 'content': 0.20339858531951904, 'timestamp': '2025-10-01 04:28:24.186284', 'step': 8763, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:24.246304', 'step': 8763, 'epoch': 2} {'type': 'loss', 'content': 0.12750235199928284, 'timestamp': '2025-10-01 04:28:24.252692', 'step': 8764, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:24.306037', 'step': 8764, 'epoch': 2} {'type': 'loss', 'content': 0.13626593351364136, 'timestamp': '2025-10-01 04:28:24.308716', 'step': 8765, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:24.366866', 'step': 8765, 'epoch': 2} {'type': 'loss', 'content': 0.24271170794963837, 'timestamp': '2025-10-01 04:28:24.369348', 'step': 8766, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:24.441916', 'step': 8766, 'epoch': 2} {'type': 'loss', 'content': 0.12820589542388916, 'timestamp': '2025-10-01 04:28:24.445957', 'step': 8767, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:24.502183', 'step': 8767, 'epoch': 2} {'type': 'loss', 'content': 0.16460636258125305, 'timestamp': '2025-10-01 04:28:24.508753', 'step': 8768, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:24.564475', 'step': 8768, 'epoch': 2} {'type': 'loss', 'content': 0.0855274572968483, 'timestamp': '2025-10-01 04:28:24.567470', 'step': 8769, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:24.623410', 'step': 8769, 'epoch': 2} {'type': 'loss', 'content': 0.08538054674863815, 'timestamp': '2025-10-01 04:28:24.625708', 'step': 8770, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:24.681522', 'step': 8770, 'epoch': 2} {'type': 'loss', 'content': 0.12382081151008606, 'timestamp': '2025-10-01 04:28:24.684289', 'step': 8771, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:28:24.738875', 'step': 8771, 'epoch': 2} {'type': 'loss', 'content': 0.10206033289432526, 'timestamp': '2025-10-01 04:28:24.745446', 'step': 8772, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:24.800252', 'step': 8772, 'epoch': 2} {'type': 'loss', 'content': 0.1358909159898758, 'timestamp': '2025-10-01 04:28:24.802674', 'step': 8773, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:28:24.858634', 'step': 8773, 'epoch': 2} {'type': 'loss', 'content': 0.13126525282859802, 'timestamp': '2025-10-01 04:28:24.877018', 'step': 8774, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:28:24.936543', 'step': 8774, 'epoch': 2} {'type': 'loss', 'content': 0.10949932038784027, 'timestamp': '2025-10-01 04:28:24.939206', 'step': 8775, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:24.993807', 'step': 8775, 'epoch': 2} {'type': 'loss', 'content': 0.12993964552879333, 'timestamp': '2025-10-01 04:28:25.000187', 'step': 8776, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:25.054030', 'step': 8776, 'epoch': 2} {'type': 'loss', 'content': 0.1000308245420456, 'timestamp': '2025-10-01 04:28:25.056299', 'step': 8777, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:25.109964', 'step': 8777, 'epoch': 2} {'type': 'loss', 'content': 0.0998791828751564, 'timestamp': '2025-10-01 04:28:25.112708', 'step': 8778, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:28:25.166847', 'step': 8778, 'epoch': 2} {'type': 'loss', 'content': 0.1589689701795578, 'timestamp': '2025-10-01 04:28:25.169823', 'step': 8779, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:25.272782', 'step': 8779, 'epoch': 2} {'type': 'loss', 'content': 0.05241212621331215, 'timestamp': '2025-10-01 04:28:25.280545', 'step': 8780, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:25.355915', 'step': 8780, 'epoch': 2} {'type': 'loss', 'content': 0.18449604511260986, 'timestamp': '2025-10-01 04:28:25.368222', 'step': 8781, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:28:25.459116', 'step': 8781, 'epoch': 2} {'type': 'loss', 'content': 0.09607499837875366, 'timestamp': '2025-10-01 04:28:25.465682', 'step': 8782, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:25.522614', 'step': 8782, 'epoch': 2} {'type': 'loss', 'content': 0.09416402131319046, 'timestamp': '2025-10-01 04:28:25.530312', 'step': 8783, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:28:25.596653', 'step': 8783, 'epoch': 2} {'type': 'loss', 'content': 0.11101068556308746, 'timestamp': '2025-10-01 04:28:25.602941', 'step': 8784, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:25.691191', 'step': 8784, 'epoch': 2} {'type': 'loss', 'content': 0.22849427163600922, 'timestamp': '2025-10-01 04:28:25.702981', 'step': 8785, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:25.758301', 'step': 8785, 'epoch': 2} {'type': 'loss', 'content': 0.07093998044729233, 'timestamp': '2025-10-01 04:28:25.774305', 'step': 8786, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:25.842470', 'step': 8786, 'epoch': 2} {'type': 'loss', 'content': 0.14081379771232605, 'timestamp': '2025-10-01 04:28:25.859762', 'step': 8787, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:25.928206', 'step': 8787, 'epoch': 2} {'type': 'loss', 'content': 0.11586053669452667, 'timestamp': '2025-10-01 04:28:25.948508', 'step': 8788, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:26.007738', 'step': 8788, 'epoch': 2} {'type': 'loss', 'content': 0.13570675253868103, 'timestamp': '2025-10-01 04:28:26.014437', 'step': 8789, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:26.092535', 'step': 8789, 'epoch': 2} {'type': 'loss', 'content': 0.0711345449090004, 'timestamp': '2025-10-01 04:28:26.101811', 'step': 8790, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:26.178839', 'step': 8790, 'epoch': 2} {'type': 'loss', 'content': 0.09859269112348557, 'timestamp': '2025-10-01 04:28:26.183898', 'step': 8791, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:26.269575', 'step': 8791, 'epoch': 2} {'type': 'loss', 'content': 0.2188023328781128, 'timestamp': '2025-10-01 04:28:26.288545', 'step': 8792, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:26.345783', 'step': 8792, 'epoch': 2} {'type': 'loss', 'content': 0.13744914531707764, 'timestamp': '2025-10-01 04:28:26.347672', 'step': 8793, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:28:26.408116', 'step': 8793, 'epoch': 2} {'type': 'loss', 'content': 0.16800351440906525, 'timestamp': '2025-10-01 04:28:26.413160', 'step': 8794, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:26.479190', 'step': 8794, 'epoch': 2} {'type': 'loss', 'content': 0.16886498034000397, 'timestamp': '2025-10-01 04:28:26.482597', 'step': 8795, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:26.567748', 'step': 8795, 'epoch': 2} {'type': 'loss', 'content': 0.1332959234714508, 'timestamp': '2025-10-01 04:28:26.574411', 'step': 8796, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:28:26.628324', 'step': 8796, 'epoch': 2} {'type': 'loss', 'content': 0.07648672163486481, 'timestamp': '2025-10-01 04:28:26.631072', 'step': 8797, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:26.687407', 'step': 8797, 'epoch': 2} {'type': 'loss', 'content': 0.05734652280807495, 'timestamp': '2025-10-01 04:28:26.690439', 'step': 8798, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:26.745480', 'step': 8798, 'epoch': 2} {'type': 'loss', 'content': 0.15124928951263428, 'timestamp': '2025-10-01 04:28:26.747730', 'step': 8799, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:26.802312', 'step': 8799, 'epoch': 2} {'type': 'loss', 'content': 0.07234982401132584, 'timestamp': '2025-10-01 04:28:26.808479', 'step': 8800, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:26.861810', 'step': 8800, 'epoch': 2} {'type': 'loss', 'content': 0.1547643095254898, 'timestamp': '2025-10-01 04:28:26.864241', 'step': 8801, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:26.931651', 'step': 8801, 'epoch': 2} {'type': 'loss', 'content': 0.12652845680713654, 'timestamp': '2025-10-01 04:28:26.934058', 'step': 8802, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:26.988307', 'step': 8802, 'epoch': 2} {'type': 'loss', 'content': 0.07532402127981186, 'timestamp': '2025-10-01 04:28:26.990484', 'step': 8803, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:27.045196', 'step': 8803, 'epoch': 2} {'type': 'loss', 'content': 0.0701705738902092, 'timestamp': '2025-10-01 04:28:27.051408', 'step': 8804, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:27.132743', 'step': 8804, 'epoch': 2} {'type': 'loss', 'content': 0.19643805921077728, 'timestamp': '2025-10-01 04:28:27.135083', 'step': 8805, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:27.190082', 'step': 8805, 'epoch': 2} {'type': 'loss', 'content': 0.13551227748394012, 'timestamp': '2025-10-01 04:28:27.192271', 'step': 8806, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:27.246389', 'step': 8806, 'epoch': 2} {'type': 'loss', 'content': 0.09099990129470825, 'timestamp': '2025-10-01 04:28:27.255119', 'step': 8807, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:27.309667', 'step': 8807, 'epoch': 2} {'type': 'loss', 'content': 0.19963198900222778, 'timestamp': '2025-10-01 04:28:27.317531', 'step': 8808, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:27.370806', 'step': 8808, 'epoch': 2} {'type': 'loss', 'content': 0.18516775965690613, 'timestamp': '2025-10-01 04:28:27.372832', 'step': 8809, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:28:27.440632', 'step': 8809, 'epoch': 2} {'type': 'loss', 'content': 0.11051537096500397, 'timestamp': '2025-10-01 04:28:27.442942', 'step': 8810, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:27.499012', 'step': 8810, 'epoch': 2} {'type': 'loss', 'content': 0.12194213271141052, 'timestamp': '2025-10-01 04:28:27.501182', 'step': 8811, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:27.561811', 'step': 8811, 'epoch': 2} {'type': 'loss', 'content': 0.0783812552690506, 'timestamp': '2025-10-01 04:28:27.583057', 'step': 8812, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:27.649232', 'step': 8812, 'epoch': 2} {'type': 'loss', 'content': 0.15681076049804688, 'timestamp': '2025-10-01 04:28:27.651799', 'step': 8813, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:27.713525', 'step': 8813, 'epoch': 2} {'type': 'loss', 'content': 0.11805704981088638, 'timestamp': '2025-10-01 04:28:27.716011', 'step': 8814, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:27.781288', 'step': 8814, 'epoch': 2} {'type': 'loss', 'content': 0.20893928408622742, 'timestamp': '2025-10-01 04:28:27.783892', 'step': 8815, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:27.840927', 'step': 8815, 'epoch': 2} {'type': 'loss', 'content': 0.09330476820468903, 'timestamp': '2025-10-01 04:28:27.848659', 'step': 8816, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:28:27.903647', 'step': 8816, 'epoch': 2} {'type': 'loss', 'content': 0.16370442509651184, 'timestamp': '2025-10-01 04:28:27.905907', 'step': 8817, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:27.970103', 'step': 8817, 'epoch': 2} {'type': 'loss', 'content': 0.1917630285024643, 'timestamp': '2025-10-01 04:28:27.972273', 'step': 8818, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:28:28.027361', 'step': 8818, 'epoch': 2} {'type': 'loss', 'content': 0.20075209438800812, 'timestamp': '2025-10-01 04:28:28.030813', 'step': 8819, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:28.087425', 'step': 8819, 'epoch': 2} {'type': 'loss', 'content': 0.19255541265010834, 'timestamp': '2025-10-01 04:28:28.093751', 'step': 8820, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:28.147420', 'step': 8820, 'epoch': 2} {'type': 'loss', 'content': 0.13133840262889862, 'timestamp': '2025-10-01 04:28:28.149968', 'step': 8821, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:28.214353', 'step': 8821, 'epoch': 2} {'type': 'loss', 'content': 0.07268363237380981, 'timestamp': '2025-10-01 04:28:28.216985', 'step': 8822, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:28:28.277794', 'step': 8822, 'epoch': 2} {'type': 'loss', 'content': 0.10148973017930984, 'timestamp': '2025-10-01 04:28:28.295080', 'step': 8823, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:28.352696', 'step': 8823, 'epoch': 2} {'type': 'loss', 'content': 0.16442425549030304, 'timestamp': '2025-10-01 04:28:28.360053', 'step': 8824, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:28.432050', 'step': 8824, 'epoch': 2} {'type': 'loss', 'content': 0.09449116140604019, 'timestamp': '2025-10-01 04:28:28.433941', 'step': 8825, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:28:28.494200', 'step': 8825, 'epoch': 2} {'type': 'loss', 'content': 0.10810235142707825, 'timestamp': '2025-10-01 04:28:28.496377', 'step': 8826, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:28.551939', 'step': 8826, 'epoch': 2} {'type': 'loss', 'content': 0.1458904892206192, 'timestamp': '2025-10-01 04:28:28.554249', 'step': 8827, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:28.609451', 'step': 8827, 'epoch': 2} {'type': 'loss', 'content': 0.06474588066339493, 'timestamp': '2025-10-01 04:28:28.615609', 'step': 8828, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:28.669650', 'step': 8828, 'epoch': 2} {'type': 'loss', 'content': 0.11295083910226822, 'timestamp': '2025-10-01 04:28:28.671797', 'step': 8829, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:28.735499', 'step': 8829, 'epoch': 2} {'type': 'loss', 'content': 0.15688015520572662, 'timestamp': '2025-10-01 04:28:28.738997', 'step': 8830, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:28.793024', 'step': 8830, 'epoch': 2} {'type': 'loss', 'content': 0.18250253796577454, 'timestamp': '2025-10-01 04:28:28.795692', 'step': 8831, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:28.848816', 'step': 8831, 'epoch': 2} {'type': 'loss', 'content': 0.09617108851671219, 'timestamp': '2025-10-01 04:28:28.855048', 'step': 8832, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:28.908058', 'step': 8832, 'epoch': 2} {'type': 'loss', 'content': 0.04083234444260597, 'timestamp': '2025-10-01 04:28:28.910156', 'step': 8833, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:28.963420', 'step': 8833, 'epoch': 2} {'type': 'loss', 'content': 0.20277023315429688, 'timestamp': '2025-10-01 04:28:28.966364', 'step': 8834, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:28:29.020896', 'step': 8834, 'epoch': 2} {'type': 'loss', 'content': 0.10986092686653137, 'timestamp': '2025-10-01 04:28:29.023163', 'step': 8835, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:29.077321', 'step': 8835, 'epoch': 2} {'type': 'loss', 'content': 0.2247500717639923, 'timestamp': '2025-10-01 04:28:29.083539', 'step': 8836, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:28:29.140294', 'step': 8836, 'epoch': 2} {'type': 'loss', 'content': 0.12430575489997864, 'timestamp': '2025-10-01 04:28:29.142664', 'step': 8837, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:29.207507', 'step': 8837, 'epoch': 2} {'type': 'loss', 'content': 0.215843066573143, 'timestamp': '2025-10-01 04:28:29.210627', 'step': 8838, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:29.264604', 'step': 8838, 'epoch': 2} {'type': 'loss', 'content': 0.12421484291553497, 'timestamp': '2025-10-01 04:28:29.267651', 'step': 8839, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:29.332233', 'step': 8839, 'epoch': 2} {'type': 'loss', 'content': 0.10912586003541946, 'timestamp': '2025-10-01 04:28:29.338387', 'step': 8840, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:28:29.391304', 'step': 8840, 'epoch': 2} {'type': 'loss', 'content': 0.10287344455718994, 'timestamp': '2025-10-01 04:28:29.393444', 'step': 8841, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:29.448410', 'step': 8841, 'epoch': 2} {'type': 'loss', 'content': 0.08720244467258453, 'timestamp': '2025-10-01 04:28:29.451444', 'step': 8842, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:29.506289', 'step': 8842, 'epoch': 2} {'type': 'loss', 'content': 0.1234326884150505, 'timestamp': '2025-10-01 04:28:29.520006', 'step': 8843, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:29.574438', 'step': 8843, 'epoch': 2} {'type': 'loss', 'content': 0.08676886558532715, 'timestamp': '2025-10-01 04:28:29.580587', 'step': 8844, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:29.635379', 'step': 8844, 'epoch': 2} {'type': 'loss', 'content': 0.130983367562294, 'timestamp': '2025-10-01 04:28:29.638005', 'step': 8845, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:28:29.695833', 'step': 8845, 'epoch': 2} {'type': 'loss', 'content': 0.14104072749614716, 'timestamp': '2025-10-01 04:28:29.698215', 'step': 8846, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:29.753340', 'step': 8846, 'epoch': 2} {'type': 'loss', 'content': 0.09559044241905212, 'timestamp': '2025-10-01 04:28:29.755410', 'step': 8847, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:29.809486', 'step': 8847, 'epoch': 2} {'type': 'loss', 'content': 0.09315522015094757, 'timestamp': '2025-10-01 04:28:29.815630', 'step': 8848, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:29.870369', 'step': 8848, 'epoch': 2} {'type': 'loss', 'content': 0.17622244358062744, 'timestamp': '2025-10-01 04:28:29.873387', 'step': 8849, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:29.929066', 'step': 8849, 'epoch': 2} {'type': 'loss', 'content': 0.11989279091358185, 'timestamp': '2025-10-01 04:28:29.931912', 'step': 8850, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:28:30.005636', 'step': 8850, 'epoch': 2} {'type': 'loss', 'content': 0.17165948450565338, 'timestamp': '2025-10-01 04:28:30.008366', 'step': 8851, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:28:30.068017', 'step': 8851, 'epoch': 2} {'type': 'loss', 'content': 0.049084585160017014, 'timestamp': '2025-10-01 04:28:30.074718', 'step': 8852, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:30.132107', 'step': 8852, 'epoch': 2} {'type': 'loss', 'content': 0.1509905755519867, 'timestamp': '2025-10-01 04:28:30.134563', 'step': 8853, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:30.192064', 'step': 8853, 'epoch': 2} {'type': 'loss', 'content': 0.13768607378005981, 'timestamp': '2025-10-01 04:28:30.194320', 'step': 8854, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-01 04:28:44.348537', 'step': 8854, 'epoch': 2} {'type': 'pplx', 'content': 13001.901598612883, 'timestamp': '2025-10-01 04:28:44.352731', 'step': 8854, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:44.411710', 'step': 8854, 'epoch': 2} {'type': 'loss', 'content': 0.10887464880943298, 'timestamp': '2025-10-01 04:28:44.419764', 'step': 8855, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:44.476258', 'step': 8855, 'epoch': 2} {'type': 'loss', 'content': 0.20601192116737366, 'timestamp': '2025-10-01 04:28:44.482604', 'step': 8856, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:44.550853', 'step': 8856, 'epoch': 2} {'type': 'loss', 'content': 0.14776815474033356, 'timestamp': '2025-10-01 04:28:44.555216', 'step': 8857, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:44.624138', 'step': 8857, 'epoch': 2} {'type': 'loss', 'content': 0.09804297238588333, 'timestamp': '2025-10-01 04:28:44.628860', 'step': 8858, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:44.688851', 'step': 8858, 'epoch': 2} {'type': 'loss', 'content': 0.15217836201190948, 'timestamp': '2025-10-01 04:28:44.704146', 'step': 8859, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:44.770213', 'step': 8859, 'epoch': 2} {'type': 'loss', 'content': 0.1117921844124794, 'timestamp': '2025-10-01 04:28:44.776776', 'step': 8860, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:28:44.833570', 'step': 8860, 'epoch': 2} {'type': 'loss', 'content': 0.16759058833122253, 'timestamp': '2025-10-01 04:28:44.835900', 'step': 8861, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:44.890217', 'step': 8861, 'epoch': 2} {'type': 'loss', 'content': 0.13545668125152588, 'timestamp': '2025-10-01 04:28:44.895902', 'step': 8862, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:28:44.954828', 'step': 8862, 'epoch': 2} {'type': 'loss', 'content': 0.08128891885280609, 'timestamp': '2025-10-01 04:28:44.957146', 'step': 8863, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:45.011261', 'step': 8863, 'epoch': 2} {'type': 'loss', 'content': 0.13664552569389343, 'timestamp': '2025-10-01 04:28:45.018512', 'step': 8864, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:45.076347', 'step': 8864, 'epoch': 2} {'type': 'loss', 'content': 0.1312737762928009, 'timestamp': '2025-10-01 04:28:45.078682', 'step': 8865, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:28:45.143776', 'step': 8865, 'epoch': 2} {'type': 'loss', 'content': 0.10131251811981201, 'timestamp': '2025-10-01 04:28:45.146044', 'step': 8866, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:28:45.206273', 'step': 8866, 'epoch': 2} {'type': 'loss', 'content': 0.13430935144424438, 'timestamp': '2025-10-01 04:28:45.213061', 'step': 8867, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:45.271035', 'step': 8867, 'epoch': 2} {'type': 'loss', 'content': 0.19437120854854584, 'timestamp': '2025-10-01 04:28:45.277719', 'step': 8868, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:45.336829', 'step': 8868, 'epoch': 2} {'type': 'loss', 'content': 0.07798195630311966, 'timestamp': '2025-10-01 04:28:45.341809', 'step': 8869, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:45.402836', 'step': 8869, 'epoch': 2} {'type': 'loss', 'content': 0.10616093128919601, 'timestamp': '2025-10-01 04:28:45.404901', 'step': 8870, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:45.464078', 'step': 8870, 'epoch': 2} {'type': 'loss', 'content': 0.12488870322704315, 'timestamp': '2025-10-01 04:28:45.466172', 'step': 8871, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:45.530261', 'step': 8871, 'epoch': 2} {'type': 'loss', 'content': 0.08717289566993713, 'timestamp': '2025-10-01 04:28:45.536810', 'step': 8872, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:45.592132', 'step': 8872, 'epoch': 2} {'type': 'loss', 'content': 0.1829601675271988, 'timestamp': '2025-10-01 04:28:45.598795', 'step': 8873, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:28:45.655149', 'step': 8873, 'epoch': 2} {'type': 'loss', 'content': 0.13528622686862946, 'timestamp': '2025-10-01 04:28:45.657755', 'step': 8874, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:45.716309', 'step': 8874, 'epoch': 2} {'type': 'loss', 'content': 0.11933156847953796, 'timestamp': '2025-10-01 04:28:45.718932', 'step': 8875, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:45.776904', 'step': 8875, 'epoch': 2} {'type': 'loss', 'content': 0.08546661585569382, 'timestamp': '2025-10-01 04:28:45.783529', 'step': 8876, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:45.839875', 'step': 8876, 'epoch': 2} {'type': 'loss', 'content': 0.22746096551418304, 'timestamp': '2025-10-01 04:28:45.841963', 'step': 8877, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:28:45.897011', 'step': 8877, 'epoch': 2} {'type': 'loss', 'content': 0.1499832570552826, 'timestamp': '2025-10-01 04:28:45.899788', 'step': 8878, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:45.961536', 'step': 8878, 'epoch': 2} {'type': 'loss', 'content': 0.1575605571269989, 'timestamp': '2025-10-01 04:28:45.963786', 'step': 8879, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:28:46.017978', 'step': 8879, 'epoch': 2} {'type': 'loss', 'content': 0.21285535395145416, 'timestamp': '2025-10-01 04:28:46.024331', 'step': 8880, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:46.078270', 'step': 8880, 'epoch': 2} {'type': 'loss', 'content': 0.13704679906368256, 'timestamp': '2025-10-01 04:28:46.080467', 'step': 8881, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:28:46.142916', 'step': 8881, 'epoch': 2} {'type': 'loss', 'content': 0.1718585044145584, 'timestamp': '2025-10-01 04:28:46.145536', 'step': 8882, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:46.199433', 'step': 8882, 'epoch': 2} {'type': 'loss', 'content': 0.109320729970932, 'timestamp': '2025-10-01 04:28:46.206685', 'step': 8883, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:46.260004', 'step': 8883, 'epoch': 2} {'type': 'loss', 'content': 0.17704863846302032, 'timestamp': '2025-10-01 04:28:46.266122', 'step': 8884, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:46.319326', 'step': 8884, 'epoch': 2} {'type': 'loss', 'content': 0.1177566722035408, 'timestamp': '2025-10-01 04:28:46.321739', 'step': 8885, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:46.376199', 'step': 8885, 'epoch': 2} {'type': 'loss', 'content': 0.22432947158813477, 'timestamp': '2025-10-01 04:28:46.379242', 'step': 8886, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:28:46.433231', 'step': 8886, 'epoch': 2} {'type': 'loss', 'content': 0.11146103590726852, 'timestamp': '2025-10-01 04:28:46.435427', 'step': 8887, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:46.492182', 'step': 8887, 'epoch': 2} {'type': 'loss', 'content': 0.10841881483793259, 'timestamp': '2025-10-01 04:28:46.498247', 'step': 8888, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:46.551694', 'step': 8888, 'epoch': 2} {'type': 'loss', 'content': 0.08019337058067322, 'timestamp': '2025-10-01 04:28:46.554001', 'step': 8889, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:46.608506', 'step': 8889, 'epoch': 2} {'type': 'loss', 'content': 0.15130014717578888, 'timestamp': '2025-10-01 04:28:46.611918', 'step': 8890, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:46.667492', 'step': 8890, 'epoch': 2} {'type': 'loss', 'content': 0.1364990919828415, 'timestamp': '2025-10-01 04:28:46.669756', 'step': 8891, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:28:46.723967', 'step': 8891, 'epoch': 2} {'type': 'loss', 'content': 0.18837134540081024, 'timestamp': '2025-10-01 04:28:46.732100', 'step': 8892, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:46.786357', 'step': 8892, 'epoch': 2} {'type': 'loss', 'content': 0.08517319709062576, 'timestamp': '2025-10-01 04:28:46.788777', 'step': 8893, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:28:46.842713', 'step': 8893, 'epoch': 2} {'type': 'loss', 'content': 0.17292629182338715, 'timestamp': '2025-10-01 04:28:46.845076', 'step': 8894, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:46.898738', 'step': 8894, 'epoch': 2} {'type': 'loss', 'content': 0.09330997616052628, 'timestamp': '2025-10-01 04:28:46.901011', 'step': 8895, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:46.955564', 'step': 8895, 'epoch': 2} {'type': 'loss', 'content': 0.0869140625, 'timestamp': '2025-10-01 04:28:46.961861', 'step': 8896, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:47.015005', 'step': 8896, 'epoch': 2} {'type': 'loss', 'content': 0.14405551552772522, 'timestamp': '2025-10-01 04:28:47.017061', 'step': 8897, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:47.071390', 'step': 8897, 'epoch': 2} {'type': 'loss', 'content': 0.11888883262872696, 'timestamp': '2025-10-01 04:28:47.073778', 'step': 8898, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:28:47.141394', 'step': 8898, 'epoch': 2} {'type': 'loss', 'content': 0.12624329328536987, 'timestamp': '2025-10-01 04:28:47.144079', 'step': 8899, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:47.198307', 'step': 8899, 'epoch': 2} {'type': 'loss', 'content': 0.1361265331506729, 'timestamp': '2025-10-01 04:28:47.215265', 'step': 8900, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:47.268553', 'step': 8900, 'epoch': 2} {'type': 'loss', 'content': 0.08620822429656982, 'timestamp': '2025-10-01 04:28:47.270786', 'step': 8901, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:47.342121', 'step': 8901, 'epoch': 2} {'type': 'loss', 'content': 0.17699125409126282, 'timestamp': '2025-10-01 04:28:47.344350', 'step': 8902, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:47.397890', 'step': 8902, 'epoch': 2} {'type': 'loss', 'content': 0.12208785861730576, 'timestamp': '2025-10-01 04:28:47.400074', 'step': 8903, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:47.453688', 'step': 8903, 'epoch': 2} {'type': 'loss', 'content': 0.20595134794712067, 'timestamp': '2025-10-01 04:28:47.460386', 'step': 8904, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:47.513742', 'step': 8904, 'epoch': 2} {'type': 'loss', 'content': 0.29546859860420227, 'timestamp': '2025-10-01 04:28:47.515973', 'step': 8905, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:47.569005', 'step': 8905, 'epoch': 2} {'type': 'loss', 'content': 0.09396291524171829, 'timestamp': '2025-10-01 04:28:47.571970', 'step': 8906, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:47.625490', 'step': 8906, 'epoch': 2} {'type': 'loss', 'content': 0.12677358090877533, 'timestamp': '2025-10-01 04:28:47.627812', 'step': 8907, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:47.680984', 'step': 8907, 'epoch': 2} {'type': 'loss', 'content': 0.19168615341186523, 'timestamp': '2025-10-01 04:28:47.687108', 'step': 8908, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:47.740162', 'step': 8908, 'epoch': 2} {'type': 'loss', 'content': 0.1034763753414154, 'timestamp': '2025-10-01 04:28:47.743354', 'step': 8909, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:47.796383', 'step': 8909, 'epoch': 2} {'type': 'loss', 'content': 0.14670614898204803, 'timestamp': '2025-10-01 04:28:47.799026', 'step': 8910, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:47.867531', 'step': 8910, 'epoch': 2} {'type': 'loss', 'content': 0.20333637297153473, 'timestamp': '2025-10-01 04:28:47.869702', 'step': 8911, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:47.925242', 'step': 8911, 'epoch': 2} {'type': 'loss', 'content': 0.10827647894620895, 'timestamp': '2025-10-01 04:28:47.931485', 'step': 8912, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:48.002715', 'step': 8912, 'epoch': 2} {'type': 'loss', 'content': 0.10592418909072876, 'timestamp': '2025-10-01 04:28:48.004950', 'step': 8913, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:48.059547', 'step': 8913, 'epoch': 2} {'type': 'loss', 'content': 0.065004363656044, 'timestamp': '2025-10-01 04:28:48.062421', 'step': 8914, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:48.117442', 'step': 8914, 'epoch': 2} {'type': 'loss', 'content': 0.24522916972637177, 'timestamp': '2025-10-01 04:28:48.119481', 'step': 8915, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:48.173242', 'step': 8915, 'epoch': 2} {'type': 'loss', 'content': 0.07069414108991623, 'timestamp': '2025-10-01 04:28:48.179218', 'step': 8916, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:48.232395', 'step': 8916, 'epoch': 2} {'type': 'loss', 'content': 0.06839001178741455, 'timestamp': '2025-10-01 04:28:48.234848', 'step': 8917, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:48.288767', 'step': 8917, 'epoch': 2} {'type': 'loss', 'content': 0.08591227978467941, 'timestamp': '2025-10-01 04:28:48.291499', 'step': 8918, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:48.344529', 'step': 8918, 'epoch': 2} {'type': 'loss', 'content': 0.1305711716413498, 'timestamp': '2025-10-01 04:28:48.347963', 'step': 8919, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:48.401769', 'step': 8919, 'epoch': 2} {'type': 'loss', 'content': 0.12380959093570709, 'timestamp': '2025-10-01 04:28:48.407807', 'step': 8920, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:48.460744', 'step': 8920, 'epoch': 2} {'type': 'loss', 'content': 0.11895505338907242, 'timestamp': '2025-10-01 04:28:48.463005', 'step': 8921, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:48.518879', 'step': 8921, 'epoch': 2} {'type': 'loss', 'content': 0.14987996220588684, 'timestamp': '2025-10-01 04:28:48.522178', 'step': 8922, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:48.576404', 'step': 8922, 'epoch': 2} {'type': 'loss', 'content': 0.24714447557926178, 'timestamp': '2025-10-01 04:28:48.578701', 'step': 8923, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:48.632865', 'step': 8923, 'epoch': 2} {'type': 'loss', 'content': 0.1644206941127777, 'timestamp': '2025-10-01 04:28:48.639136', 'step': 8924, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:48.693641', 'step': 8924, 'epoch': 2} {'type': 'loss', 'content': 0.1524883359670639, 'timestamp': '2025-10-01 04:28:48.695883', 'step': 8925, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:48.751594', 'step': 8925, 'epoch': 2} {'type': 'loss', 'content': 0.11470590531826019, 'timestamp': '2025-10-01 04:28:48.753839', 'step': 8926, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:48.814520', 'step': 8926, 'epoch': 2} {'type': 'loss', 'content': 0.07590483874082565, 'timestamp': '2025-10-01 04:28:48.816534', 'step': 8927, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:48.870724', 'step': 8927, 'epoch': 2} {'type': 'loss', 'content': 0.25571852922439575, 'timestamp': '2025-10-01 04:28:48.876855', 'step': 8928, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:48.931041', 'step': 8928, 'epoch': 2} {'type': 'loss', 'content': 0.21814656257629395, 'timestamp': '2025-10-01 04:28:48.933230', 'step': 8929, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:48.986556', 'step': 8929, 'epoch': 2} {'type': 'loss', 'content': 0.18346735835075378, 'timestamp': '2025-10-01 04:28:48.998731', 'step': 8930, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:49.052811', 'step': 8930, 'epoch': 2} {'type': 'loss', 'content': 0.10317811369895935, 'timestamp': '2025-10-01 04:28:49.054840', 'step': 8931, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:49.108862', 'step': 8931, 'epoch': 2} {'type': 'loss', 'content': 0.08347499370574951, 'timestamp': '2025-10-01 04:28:49.115762', 'step': 8932, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:49.179871', 'step': 8932, 'epoch': 2} {'type': 'loss', 'content': 0.08718550950288773, 'timestamp': '2025-10-01 04:28:49.182306', 'step': 8933, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:49.236830', 'step': 8933, 'epoch': 2} {'type': 'loss', 'content': 0.14495214819908142, 'timestamp': '2025-10-01 04:28:49.239248', 'step': 8934, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:49.294009', 'step': 8934, 'epoch': 2} {'type': 'loss', 'content': 0.09831295907497406, 'timestamp': '2025-10-01 04:28:49.303887', 'step': 8935, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:49.357098', 'step': 8935, 'epoch': 2} {'type': 'loss', 'content': 0.07298146933317184, 'timestamp': '2025-10-01 04:28:49.364056', 'step': 8936, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:49.422012', 'step': 8936, 'epoch': 2} {'type': 'loss', 'content': 0.18770833313465118, 'timestamp': '2025-10-01 04:28:49.424097', 'step': 8937, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:49.478503', 'step': 8937, 'epoch': 2} {'type': 'loss', 'content': 0.04834720864892006, 'timestamp': '2025-10-01 04:28:49.481652', 'step': 8938, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:49.535821', 'step': 8938, 'epoch': 2} {'type': 'loss', 'content': 0.19162514805793762, 'timestamp': '2025-10-01 04:28:49.537984', 'step': 8939, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:49.592365', 'step': 8939, 'epoch': 2} {'type': 'loss', 'content': 0.14639703929424286, 'timestamp': '2025-10-01 04:28:49.599553', 'step': 8940, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:49.659002', 'step': 8940, 'epoch': 2} {'type': 'loss', 'content': 0.08342863619327545, 'timestamp': '2025-10-01 04:28:49.661399', 'step': 8941, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:49.715241', 'step': 8941, 'epoch': 2} {'type': 'loss', 'content': 0.10749301314353943, 'timestamp': '2025-10-01 04:28:49.718859', 'step': 8942, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:49.772452', 'step': 8942, 'epoch': 2} {'type': 'loss', 'content': 0.10093227028846741, 'timestamp': '2025-10-01 04:28:49.774837', 'step': 8943, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:49.829254', 'step': 8943, 'epoch': 2} {'type': 'loss', 'content': 0.08588900417089462, 'timestamp': '2025-10-01 04:28:49.836310', 'step': 8944, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:49.891374', 'step': 8944, 'epoch': 2} {'type': 'loss', 'content': 0.15377452969551086, 'timestamp': '2025-10-01 04:28:49.893496', 'step': 8945, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:49.946904', 'step': 8945, 'epoch': 2} {'type': 'loss', 'content': 0.15195922553539276, 'timestamp': '2025-10-01 04:28:49.954779', 'step': 8946, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:50.007824', 'step': 8946, 'epoch': 2} {'type': 'loss', 'content': 0.10083714127540588, 'timestamp': '2025-10-01 04:28:50.010082', 'step': 8947, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:50.064732', 'step': 8947, 'epoch': 2} {'type': 'loss', 'content': 0.15626344084739685, 'timestamp': '2025-10-01 04:28:50.071393', 'step': 8948, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:28:50.123987', 'step': 8948, 'epoch': 2} {'type': 'loss', 'content': 0.1660810261964798, 'timestamp': '2025-10-01 04:28:50.126038', 'step': 8949, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:50.179818', 'step': 8949, 'epoch': 2} {'type': 'loss', 'content': 0.16782693564891815, 'timestamp': '2025-10-01 04:28:50.184123', 'step': 8950, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:50.237590', 'step': 8950, 'epoch': 2} {'type': 'loss', 'content': 0.1179029643535614, 'timestamp': '2025-10-01 04:28:50.239911', 'step': 8951, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:50.293309', 'step': 8951, 'epoch': 2} {'type': 'loss', 'content': 0.09741739928722382, 'timestamp': '2025-10-01 04:28:50.299835', 'step': 8952, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:50.353428', 'step': 8952, 'epoch': 2} {'type': 'loss', 'content': 0.17241719365119934, 'timestamp': '2025-10-01 04:28:50.355454', 'step': 8953, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:50.409401', 'step': 8953, 'epoch': 2} {'type': 'loss', 'content': 0.11183037608861923, 'timestamp': '2025-10-01 04:28:50.412172', 'step': 8954, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:50.466115', 'step': 8954, 'epoch': 2} {'type': 'loss', 'content': 0.1376943588256836, 'timestamp': '2025-10-01 04:28:50.468222', 'step': 8955, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:50.523108', 'step': 8955, 'epoch': 2} {'type': 'loss', 'content': 0.2637735605239868, 'timestamp': '2025-10-01 04:28:50.529473', 'step': 8956, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:50.583664', 'step': 8956, 'epoch': 2} {'type': 'loss', 'content': 0.15797285735607147, 'timestamp': '2025-10-01 04:28:50.586099', 'step': 8957, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:28:50.640901', 'step': 8957, 'epoch': 2} {'type': 'loss', 'content': 0.10768777877092361, 'timestamp': '2025-10-01 04:28:50.644084', 'step': 8958, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:28:50.699014', 'step': 8958, 'epoch': 2} {'type': 'loss', 'content': 0.09452342242002487, 'timestamp': '2025-10-01 04:28:50.701213', 'step': 8959, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:50.756399', 'step': 8959, 'epoch': 2} {'type': 'loss', 'content': 0.13246966898441315, 'timestamp': '2025-10-01 04:28:50.762531', 'step': 8960, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:50.816961', 'step': 8960, 'epoch': 2} {'type': 'loss', 'content': 0.1934967190027237, 'timestamp': '2025-10-01 04:28:50.819388', 'step': 8961, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:50.874031', 'step': 8961, 'epoch': 2} {'type': 'loss', 'content': 0.17565855383872986, 'timestamp': '2025-10-01 04:28:50.876867', 'step': 8962, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:50.931341', 'step': 8962, 'epoch': 2} {'type': 'loss', 'content': 0.13828209042549133, 'timestamp': '2025-10-01 04:28:50.933438', 'step': 8963, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:50.988194', 'step': 8963, 'epoch': 2} {'type': 'loss', 'content': 0.1034010797739029, 'timestamp': '2025-10-01 04:28:50.994007', 'step': 8964, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:51.048384', 'step': 8964, 'epoch': 2} {'type': 'loss', 'content': 0.14090920984745026, 'timestamp': '2025-10-01 04:28:51.051159', 'step': 8965, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:51.106884', 'step': 8965, 'epoch': 2} {'type': 'loss', 'content': 0.13042530417442322, 'timestamp': '2025-10-01 04:28:51.109396', 'step': 8966, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:51.165604', 'step': 8966, 'epoch': 2} {'type': 'loss', 'content': 0.10343124717473984, 'timestamp': '2025-10-01 04:28:51.167593', 'step': 8967, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:51.223672', 'step': 8967, 'epoch': 2} {'type': 'loss', 'content': 0.15059788525104523, 'timestamp': '2025-10-01 04:28:51.230374', 'step': 8968, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:51.284785', 'step': 8968, 'epoch': 2} {'type': 'loss', 'content': 0.13233716785907745, 'timestamp': '2025-10-01 04:28:51.287056', 'step': 8969, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:51.341108', 'step': 8969, 'epoch': 2} {'type': 'loss', 'content': 0.12595514953136444, 'timestamp': '2025-10-01 04:28:51.343882', 'step': 8970, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:28:51.399657', 'step': 8970, 'epoch': 2} {'type': 'loss', 'content': 0.07440071552991867, 'timestamp': '2025-10-01 04:28:51.401776', 'step': 8971, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:51.465160', 'step': 8971, 'epoch': 2} {'type': 'loss', 'content': 0.12815122306346893, 'timestamp': '2025-10-01 04:28:51.471297', 'step': 8972, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:51.525109', 'step': 8972, 'epoch': 2} {'type': 'loss', 'content': 0.1298360675573349, 'timestamp': '2025-10-01 04:28:51.527941', 'step': 8973, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:51.581831', 'step': 8973, 'epoch': 2} {'type': 'loss', 'content': 0.26818352937698364, 'timestamp': '2025-10-01 04:28:51.584238', 'step': 8974, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:28:51.638451', 'step': 8974, 'epoch': 2} {'type': 'loss', 'content': 0.1467876434326172, 'timestamp': '2025-10-01 04:28:51.641123', 'step': 8975, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:51.695561', 'step': 8975, 'epoch': 2} {'type': 'loss', 'content': 0.18486826121807098, 'timestamp': '2025-10-01 04:28:51.701503', 'step': 8976, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:51.754826', 'step': 8976, 'epoch': 2} {'type': 'loss', 'content': 0.09203799813985825, 'timestamp': '2025-10-01 04:28:51.757420', 'step': 8977, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:51.817983', 'step': 8977, 'epoch': 2} {'type': 'loss', 'content': 0.15900519490242004, 'timestamp': '2025-10-01 04:28:51.820196', 'step': 8978, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:28:51.877155', 'step': 8978, 'epoch': 2} {'type': 'loss', 'content': 0.25894638895988464, 'timestamp': '2025-10-01 04:28:51.879383', 'step': 8979, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:51.934016', 'step': 8979, 'epoch': 2} {'type': 'loss', 'content': 0.10793225467205048, 'timestamp': '2025-10-01 04:28:51.940291', 'step': 8980, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:51.993875', 'step': 8980, 'epoch': 2} {'type': 'loss', 'content': 0.026107825338840485, 'timestamp': '2025-10-01 04:28:51.998341', 'step': 8981, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:52.052084', 'step': 8981, 'epoch': 2} {'type': 'loss', 'content': 0.14449815452098846, 'timestamp': '2025-10-01 04:28:52.054806', 'step': 8982, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:28:52.109271', 'step': 8982, 'epoch': 2} {'type': 'loss', 'content': 0.13816885650157928, 'timestamp': '2025-10-01 04:28:52.111874', 'step': 8983, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:28:52.166703', 'step': 8983, 'epoch': 2} {'type': 'loss', 'content': 0.12787684798240662, 'timestamp': '2025-10-01 04:28:52.172897', 'step': 8984, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:52.226719', 'step': 8984, 'epoch': 2} {'type': 'loss', 'content': 0.15726785361766815, 'timestamp': '2025-10-01 04:28:52.229147', 'step': 8985, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:52.282997', 'step': 8985, 'epoch': 2} {'type': 'loss', 'content': 0.18178009986877441, 'timestamp': '2025-10-01 04:28:52.285560', 'step': 8986, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:52.340316', 'step': 8986, 'epoch': 2} {'type': 'loss', 'content': 0.19207148253917694, 'timestamp': '2025-10-01 04:28:52.342666', 'step': 8987, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:28:52.396943', 'step': 8987, 'epoch': 2} {'type': 'loss', 'content': 0.12289487570524216, 'timestamp': '2025-10-01 04:28:52.402865', 'step': 8988, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:52.455536', 'step': 8988, 'epoch': 2} {'type': 'loss', 'content': 0.11205665022134781, 'timestamp': '2025-10-01 04:28:52.460006', 'step': 8989, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:52.515119', 'step': 8989, 'epoch': 2} {'type': 'loss', 'content': 0.08965978026390076, 'timestamp': '2025-10-01 04:28:52.517347', 'step': 8990, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:52.570950', 'step': 8990, 'epoch': 2} {'type': 'loss', 'content': 0.09676820039749146, 'timestamp': '2025-10-01 04:28:52.574082', 'step': 8991, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:52.628382', 'step': 8991, 'epoch': 2} {'type': 'loss', 'content': 0.11222728341817856, 'timestamp': '2025-10-01 04:28:52.634196', 'step': 8992, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:52.687120', 'step': 8992, 'epoch': 2} {'type': 'loss', 'content': 0.1316366344690323, 'timestamp': '2025-10-01 04:28:52.689409', 'step': 8993, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:52.744196', 'step': 8993, 'epoch': 2} {'type': 'loss', 'content': 0.09219882637262344, 'timestamp': '2025-10-01 04:28:52.746558', 'step': 8994, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:52.801442', 'step': 8994, 'epoch': 2} {'type': 'loss', 'content': 0.14128907024860382, 'timestamp': '2025-10-01 04:28:52.803711', 'step': 8995, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:28:52.860811', 'step': 8995, 'epoch': 2} {'type': 'loss', 'content': 0.06663775444030762, 'timestamp': '2025-10-01 04:28:52.867477', 'step': 8996, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:52.931276', 'step': 8996, 'epoch': 2} {'type': 'loss', 'content': 0.09031359851360321, 'timestamp': '2025-10-01 04:28:52.933585', 'step': 8997, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:52.990596', 'step': 8997, 'epoch': 2} {'type': 'loss', 'content': 0.2512529492378235, 'timestamp': '2025-10-01 04:28:52.999604', 'step': 8998, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:53.054329', 'step': 8998, 'epoch': 2} {'type': 'loss', 'content': 0.13872835040092468, 'timestamp': '2025-10-01 04:28:53.056440', 'step': 8999, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:53.110738', 'step': 8999, 'epoch': 2} {'type': 'loss', 'content': 0.1204451397061348, 'timestamp': '2025-10-01 04:28:53.117129', 'step': 9000, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 9000', 'timestamp': '2025-10-01 04:28:53.481286', 'step': 9000, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:53.546161', 'step': 9000, 'epoch': 2} {'type': 'loss', 'content': 0.1669626086950302, 'timestamp': '2025-10-01 04:28:53.548358', 'step': 9001, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:53.602438', 'step': 9001, 'epoch': 2} {'type': 'loss', 'content': 0.12021371722221375, 'timestamp': '2025-10-01 04:28:53.604707', 'step': 9002, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:53.661015', 'step': 9002, 'epoch': 2} {'type': 'loss', 'content': 0.07735924422740936, 'timestamp': '2025-10-01 04:28:53.663272', 'step': 9003, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:53.717016', 'step': 9003, 'epoch': 2} {'type': 'loss', 'content': 0.09632875770330429, 'timestamp': '2025-10-01 04:28:53.723185', 'step': 9004, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:53.777159', 'step': 9004, 'epoch': 2} {'type': 'loss', 'content': 0.2379760444164276, 'timestamp': '2025-10-01 04:28:53.779480', 'step': 9005, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:53.832809', 'step': 9005, 'epoch': 2} {'type': 'loss', 'content': 0.05662167817354202, 'timestamp': '2025-10-01 04:28:53.835030', 'step': 9006, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:53.888936', 'step': 9006, 'epoch': 2} {'type': 'loss', 'content': 0.06984689831733704, 'timestamp': '2025-10-01 04:28:53.894167', 'step': 9007, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:53.947383', 'step': 9007, 'epoch': 2} {'type': 'loss', 'content': 0.13560135662555695, 'timestamp': '2025-10-01 04:28:53.953551', 'step': 9008, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:54.007283', 'step': 9008, 'epoch': 2} {'type': 'loss', 'content': 0.14839106798171997, 'timestamp': '2025-10-01 04:28:54.009769', 'step': 9009, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:54.063184', 'step': 9009, 'epoch': 2} {'type': 'loss', 'content': 0.09956791251897812, 'timestamp': '2025-10-01 04:28:54.066410', 'step': 9010, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:28:54.121444', 'step': 9010, 'epoch': 2} {'type': 'loss', 'content': 0.09887494891881943, 'timestamp': '2025-10-01 04:28:54.123618', 'step': 9011, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:54.177590', 'step': 9011, 'epoch': 2} {'type': 'loss', 'content': 0.05630147084593773, 'timestamp': '2025-10-01 04:28:54.183955', 'step': 9012, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:54.238186', 'step': 9012, 'epoch': 2} {'type': 'loss', 'content': 0.15866661071777344, 'timestamp': '2025-10-01 04:28:54.245904', 'step': 9013, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:54.302333', 'step': 9013, 'epoch': 2} {'type': 'loss', 'content': 0.14190168678760529, 'timestamp': '2025-10-01 04:28:54.309897', 'step': 9014, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:54.363654', 'step': 9014, 'epoch': 2} {'type': 'loss', 'content': 0.2058684378862381, 'timestamp': '2025-10-01 04:28:54.365905', 'step': 9015, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:54.419179', 'step': 9015, 'epoch': 2} {'type': 'loss', 'content': 0.1594102382659912, 'timestamp': '2025-10-01 04:28:54.425423', 'step': 9016, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:54.488819', 'step': 9016, 'epoch': 2} {'type': 'loss', 'content': 0.1262836903333664, 'timestamp': '2025-10-01 04:28:54.490975', 'step': 9017, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:54.545845', 'step': 9017, 'epoch': 2} {'type': 'loss', 'content': 0.059883806854486465, 'timestamp': '2025-10-01 04:28:54.548484', 'step': 9018, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:28:54.602280', 'step': 9018, 'epoch': 2} {'type': 'loss', 'content': 0.12788066267967224, 'timestamp': '2025-10-01 04:28:54.609913', 'step': 9019, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:54.664825', 'step': 9019, 'epoch': 2} {'type': 'loss', 'content': 0.18285706639289856, 'timestamp': '2025-10-01 04:28:54.671191', 'step': 9020, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:54.724548', 'step': 9020, 'epoch': 2} {'type': 'loss', 'content': 0.11131302267313004, 'timestamp': '2025-10-01 04:28:54.726827', 'step': 9021, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:54.780776', 'step': 9021, 'epoch': 2} {'type': 'loss', 'content': 0.09851110726594925, 'timestamp': '2025-10-01 04:28:54.782991', 'step': 9022, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:28:54.836903', 'step': 9022, 'epoch': 2} {'type': 'loss', 'content': 0.12183832377195358, 'timestamp': '2025-10-01 04:28:54.839213', 'step': 9023, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:54.892805', 'step': 9023, 'epoch': 2} {'type': 'loss', 'content': 0.10346786677837372, 'timestamp': '2025-10-01 04:28:54.899038', 'step': 9024, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:54.951732', 'step': 9024, 'epoch': 2} {'type': 'loss', 'content': 0.11029335111379623, 'timestamp': '2025-10-01 04:28:54.953932', 'step': 9025, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:55.008394', 'step': 9025, 'epoch': 2} {'type': 'loss', 'content': 0.16486205160617828, 'timestamp': '2025-10-01 04:28:55.010803', 'step': 9026, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:55.065074', 'step': 9026, 'epoch': 2} {'type': 'loss', 'content': 0.07367486506700516, 'timestamp': '2025-10-01 04:28:55.067147', 'step': 9027, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:55.122419', 'step': 9027, 'epoch': 2} {'type': 'loss', 'content': 0.13351118564605713, 'timestamp': '2025-10-01 04:28:55.128994', 'step': 9028, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:55.186150', 'step': 9028, 'epoch': 2} {'type': 'loss', 'content': 0.1404077112674713, 'timestamp': '2025-10-01 04:28:55.188129', 'step': 9029, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:55.241800', 'step': 9029, 'epoch': 2} {'type': 'loss', 'content': 0.14719726145267487, 'timestamp': '2025-10-01 04:28:55.244172', 'step': 9030, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:55.297753', 'step': 9030, 'epoch': 2} {'type': 'loss', 'content': 0.10308931022882462, 'timestamp': '2025-10-01 04:28:55.300128', 'step': 9031, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:55.354027', 'step': 9031, 'epoch': 2} {'type': 'loss', 'content': 0.20397081971168518, 'timestamp': '2025-10-01 04:28:55.360410', 'step': 9032, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:28:55.413589', 'step': 9032, 'epoch': 2} {'type': 'loss', 'content': 0.12672150135040283, 'timestamp': '2025-10-01 04:28:55.416144', 'step': 9033, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:28:55.470653', 'step': 9033, 'epoch': 2} {'type': 'loss', 'content': 0.08477228879928589, 'timestamp': '2025-10-01 04:28:55.472838', 'step': 9034, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:55.526846', 'step': 9034, 'epoch': 2} {'type': 'loss', 'content': 0.1658673882484436, 'timestamp': '2025-10-01 04:28:55.529102', 'step': 9035, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:55.585032', 'step': 9035, 'epoch': 2} {'type': 'loss', 'content': 0.17982739210128784, 'timestamp': '2025-10-01 04:28:55.591235', 'step': 9036, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:55.644686', 'step': 9036, 'epoch': 2} {'type': 'loss', 'content': 0.1550799459218979, 'timestamp': '2025-10-01 04:28:55.646963', 'step': 9037, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:55.699909', 'step': 9037, 'epoch': 2} {'type': 'loss', 'content': 0.14396904408931732, 'timestamp': '2025-10-01 04:28:55.702353', 'step': 9038, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:55.755724', 'step': 9038, 'epoch': 2} {'type': 'loss', 'content': 0.1417168378829956, 'timestamp': '2025-10-01 04:28:55.757960', 'step': 9039, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:55.813154', 'step': 9039, 'epoch': 2} {'type': 'loss', 'content': 0.14355343580245972, 'timestamp': '2025-10-01 04:28:55.820952', 'step': 9040, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:28:55.875135', 'step': 9040, 'epoch': 2} {'type': 'loss', 'content': 0.11994649469852448, 'timestamp': '2025-10-01 04:28:55.877307', 'step': 9041, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:55.931728', 'step': 9041, 'epoch': 2} {'type': 'loss', 'content': 0.17466790974140167, 'timestamp': '2025-10-01 04:28:55.933937', 'step': 9042, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:55.989202', 'step': 9042, 'epoch': 2} {'type': 'loss', 'content': 0.17533352971076965, 'timestamp': '2025-10-01 04:28:55.991416', 'step': 9043, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:56.047084', 'step': 9043, 'epoch': 2} {'type': 'loss', 'content': 0.11172164231538773, 'timestamp': '2025-10-01 04:28:56.053575', 'step': 9044, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:56.108179', 'step': 9044, 'epoch': 2} {'type': 'loss', 'content': 0.14654934406280518, 'timestamp': '2025-10-01 04:28:56.110285', 'step': 9045, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:56.166276', 'step': 9045, 'epoch': 2} {'type': 'loss', 'content': 0.10897858440876007, 'timestamp': '2025-10-01 04:28:56.168888', 'step': 9046, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:56.223817', 'step': 9046, 'epoch': 2} {'type': 'loss', 'content': 0.18996797502040863, 'timestamp': '2025-10-01 04:28:56.226107', 'step': 9047, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:56.280458', 'step': 9047, 'epoch': 2} {'type': 'loss', 'content': 0.09895167499780655, 'timestamp': '2025-10-01 04:28:56.286882', 'step': 9048, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:56.341361', 'step': 9048, 'epoch': 2} {'type': 'loss', 'content': 0.09690799564123154, 'timestamp': '2025-10-01 04:28:56.344059', 'step': 9049, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:56.399695', 'step': 9049, 'epoch': 2} {'type': 'loss', 'content': 0.1641748547554016, 'timestamp': '2025-10-01 04:28:56.402572', 'step': 9050, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:56.458305', 'step': 9050, 'epoch': 2} {'type': 'loss', 'content': 0.12383630126714706, 'timestamp': '2025-10-01 04:28:56.461180', 'step': 9051, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:56.516389', 'step': 9051, 'epoch': 2} {'type': 'loss', 'content': 0.1777406930923462, 'timestamp': '2025-10-01 04:28:56.523021', 'step': 9052, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:56.578886', 'step': 9052, 'epoch': 2} {'type': 'loss', 'content': 0.12058267742395401, 'timestamp': '2025-10-01 04:28:56.580954', 'step': 9053, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:56.641789', 'step': 9053, 'epoch': 2} {'type': 'loss', 'content': 0.21791309118270874, 'timestamp': '2025-10-01 04:28:56.643954', 'step': 9054, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:56.705879', 'step': 9054, 'epoch': 2} {'type': 'loss', 'content': 0.10238585621118546, 'timestamp': '2025-10-01 04:28:56.708159', 'step': 9055, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:56.773750', 'step': 9055, 'epoch': 2} {'type': 'loss', 'content': 0.18828971683979034, 'timestamp': '2025-10-01 04:28:56.781263', 'step': 9056, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:56.835276', 'step': 9056, 'epoch': 2} {'type': 'loss', 'content': 0.19015851616859436, 'timestamp': '2025-10-01 04:28:56.837641', 'step': 9057, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:56.891677', 'step': 9057, 'epoch': 2} {'type': 'loss', 'content': 0.11038529872894287, 'timestamp': '2025-10-01 04:28:56.894008', 'step': 9058, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:56.950476', 'step': 9058, 'epoch': 2} {'type': 'loss', 'content': 0.15427370369434357, 'timestamp': '2025-10-01 04:28:56.953053', 'step': 9059, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:57.006291', 'step': 9059, 'epoch': 2} {'type': 'loss', 'content': 0.13391102850437164, 'timestamp': '2025-10-01 04:28:57.012091', 'step': 9060, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:57.064676', 'step': 9060, 'epoch': 2} {'type': 'loss', 'content': 0.11829668283462524, 'timestamp': '2025-10-01 04:28:57.079229', 'step': 9061, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:57.132239', 'step': 9061, 'epoch': 2} {'type': 'loss', 'content': 0.16396564245224, 'timestamp': '2025-10-01 04:28:57.134667', 'step': 9062, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:28:57.188352', 'step': 9062, 'epoch': 2} {'type': 'loss', 'content': 0.10370451211929321, 'timestamp': '2025-10-01 04:28:57.190571', 'step': 9063, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:57.243915', 'step': 9063, 'epoch': 2} {'type': 'loss', 'content': 0.16116362810134888, 'timestamp': '2025-10-01 04:28:57.249579', 'step': 9064, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:57.301940', 'step': 9064, 'epoch': 2} {'type': 'loss', 'content': 0.08613938838243484, 'timestamp': '2025-10-01 04:28:57.303954', 'step': 9065, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:57.357025', 'step': 9065, 'epoch': 2} {'type': 'loss', 'content': 0.13581222295761108, 'timestamp': '2025-10-01 04:28:57.359333', 'step': 9066, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:28:57.412540', 'step': 9066, 'epoch': 2} {'type': 'loss', 'content': 0.08626800775527954, 'timestamp': '2025-10-01 04:28:57.414795', 'step': 9067, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:57.467766', 'step': 9067, 'epoch': 2} {'type': 'loss', 'content': 0.17329053580760956, 'timestamp': '2025-10-01 04:28:57.473816', 'step': 9068, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:57.528212', 'step': 9068, 'epoch': 2} {'type': 'loss', 'content': 0.1468333750963211, 'timestamp': '2025-10-01 04:28:57.530398', 'step': 9069, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:57.583573', 'step': 9069, 'epoch': 2} {'type': 'loss', 'content': 0.16199439764022827, 'timestamp': '2025-10-01 04:28:57.585894', 'step': 9070, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:57.641682', 'step': 9070, 'epoch': 2} {'type': 'loss', 'content': 0.17595548927783966, 'timestamp': '2025-10-01 04:28:57.643870', 'step': 9071, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:57.696272', 'step': 9071, 'epoch': 2} {'type': 'loss', 'content': 0.11327821016311646, 'timestamp': '2025-10-01 04:28:57.702055', 'step': 9072, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:57.757795', 'step': 9072, 'epoch': 2} {'type': 'loss', 'content': 0.09979686141014099, 'timestamp': '2025-10-01 04:28:57.761351', 'step': 9073, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:57.815769', 'step': 9073, 'epoch': 2} {'type': 'loss', 'content': 0.10518035292625427, 'timestamp': '2025-10-01 04:28:57.818465', 'step': 9074, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:57.872133', 'step': 9074, 'epoch': 2} {'type': 'loss', 'content': 0.3036917448043823, 'timestamp': '2025-10-01 04:28:57.874293', 'step': 9075, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:57.927282', 'step': 9075, 'epoch': 2} {'type': 'loss', 'content': 0.11644944548606873, 'timestamp': '2025-10-01 04:28:57.933588', 'step': 9076, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:57.986176', 'step': 9076, 'epoch': 2} {'type': 'loss', 'content': 0.14657771587371826, 'timestamp': '2025-10-01 04:28:57.988240', 'step': 9077, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:58.042663', 'step': 9077, 'epoch': 2} {'type': 'loss', 'content': 0.15794824063777924, 'timestamp': '2025-10-01 04:28:58.045178', 'step': 9078, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:58.103128', 'step': 9078, 'epoch': 2} {'type': 'loss', 'content': 0.11386535316705704, 'timestamp': '2025-10-01 04:28:58.106521', 'step': 9079, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:58.159200', 'step': 9079, 'epoch': 2} {'type': 'loss', 'content': 0.11132575571537018, 'timestamp': '2025-10-01 04:28:58.164953', 'step': 9080, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:58.219073', 'step': 9080, 'epoch': 2} {'type': 'loss', 'content': 0.1675073802471161, 'timestamp': '2025-10-01 04:28:58.221907', 'step': 9081, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:58.275704', 'step': 9081, 'epoch': 2} {'type': 'loss', 'content': 0.14232774078845978, 'timestamp': '2025-10-01 04:28:58.278733', 'step': 9082, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:28:58.332180', 'step': 9082, 'epoch': 2} {'type': 'loss', 'content': 0.1455281525850296, 'timestamp': '2025-10-01 04:28:58.335192', 'step': 9083, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:58.388696', 'step': 9083, 'epoch': 2} {'type': 'loss', 'content': 0.22370950877666473, 'timestamp': '2025-10-01 04:28:58.395116', 'step': 9084, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:58.448134', 'step': 9084, 'epoch': 2} {'type': 'loss', 'content': 0.1055179312825203, 'timestamp': '2025-10-01 04:28:58.451907', 'step': 9085, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:58.515548', 'step': 9085, 'epoch': 2} {'type': 'loss', 'content': 0.10566964745521545, 'timestamp': '2025-10-01 04:28:58.528036', 'step': 9086, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:28:58.583024', 'step': 9086, 'epoch': 2} {'type': 'loss', 'content': 0.11896512657403946, 'timestamp': '2025-10-01 04:28:58.585478', 'step': 9087, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:58.639118', 'step': 9087, 'epoch': 2} {'type': 'loss', 'content': 0.10240936279296875, 'timestamp': '2025-10-01 04:28:58.644800', 'step': 9088, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:58.702425', 'step': 9088, 'epoch': 2} {'type': 'loss', 'content': 0.09959205240011215, 'timestamp': '2025-10-01 04:28:58.704937', 'step': 9089, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:28:58.758172', 'step': 9089, 'epoch': 2} {'type': 'loss', 'content': 0.1419089138507843, 'timestamp': '2025-10-01 04:28:58.760485', 'step': 9090, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:58.815226', 'step': 9090, 'epoch': 2} {'type': 'loss', 'content': 0.08283597975969315, 'timestamp': '2025-10-01 04:28:58.817423', 'step': 9091, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:28:58.872132', 'step': 9091, 'epoch': 2} {'type': 'loss', 'content': 0.12985728681087494, 'timestamp': '2025-10-01 04:28:58.878389', 'step': 9092, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:28:58.933596', 'step': 9092, 'epoch': 2} {'type': 'loss', 'content': 0.13021284341812134, 'timestamp': '2025-10-01 04:28:58.940546', 'step': 9093, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:59.000584', 'step': 9093, 'epoch': 2} {'type': 'loss', 'content': 0.1545104682445526, 'timestamp': '2025-10-01 04:28:59.003238', 'step': 9094, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:59.064668', 'step': 9094, 'epoch': 2} {'type': 'loss', 'content': 0.11401546746492386, 'timestamp': '2025-10-01 04:28:59.067045', 'step': 9095, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:59.121403', 'step': 9095, 'epoch': 2} {'type': 'loss', 'content': 0.12580113112926483, 'timestamp': '2025-10-01 04:28:59.127760', 'step': 9096, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:59.181598', 'step': 9096, 'epoch': 2} {'type': 'loss', 'content': 0.19754785299301147, 'timestamp': '2025-10-01 04:28:59.184432', 'step': 9097, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:59.240173', 'step': 9097, 'epoch': 2} {'type': 'loss', 'content': 0.25025689601898193, 'timestamp': '2025-10-01 04:28:59.244076', 'step': 9098, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:59.298300', 'step': 9098, 'epoch': 2} {'type': 'loss', 'content': 0.193906769156456, 'timestamp': '2025-10-01 04:28:59.300463', 'step': 9099, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:59.354705', 'step': 9099, 'epoch': 2} {'type': 'loss', 'content': 0.18100547790527344, 'timestamp': '2025-10-01 04:28:59.360912', 'step': 9100, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:28:59.414484', 'step': 9100, 'epoch': 2} {'type': 'loss', 'content': 0.18226343393325806, 'timestamp': '2025-10-01 04:28:59.417231', 'step': 9101, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:59.470695', 'step': 9101, 'epoch': 2} {'type': 'loss', 'content': 0.10723311454057693, 'timestamp': '2025-10-01 04:28:59.473307', 'step': 9102, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:59.527688', 'step': 9102, 'epoch': 2} {'type': 'loss', 'content': 0.15775175392627716, 'timestamp': '2025-10-01 04:28:59.530348', 'step': 9103, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:59.583959', 'step': 9103, 'epoch': 2} {'type': 'loss', 'content': 0.21091458201408386, 'timestamp': '2025-10-01 04:28:59.589857', 'step': 9104, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:28:59.643478', 'step': 9104, 'epoch': 2} {'type': 'loss', 'content': 0.09463874250650406, 'timestamp': '2025-10-01 04:28:59.646076', 'step': 9105, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:59.698753', 'step': 9105, 'epoch': 2} {'type': 'loss', 'content': 0.0682811439037323, 'timestamp': '2025-10-01 04:28:59.700708', 'step': 9106, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:59.761075', 'step': 9106, 'epoch': 2} {'type': 'loss', 'content': 0.08383898437023163, 'timestamp': '2025-10-01 04:28:59.763208', 'step': 9107, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:28:59.816371', 'step': 9107, 'epoch': 2} {'type': 'loss', 'content': 0.10187386721372604, 'timestamp': '2025-10-01 04:28:59.822308', 'step': 9108, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:59.875481', 'step': 9108, 'epoch': 2} {'type': 'loss', 'content': 0.11940538883209229, 'timestamp': '2025-10-01 04:28:59.878044', 'step': 9109, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:59.931420', 'step': 9109, 'epoch': 2} {'type': 'loss', 'content': 0.14810818433761597, 'timestamp': '2025-10-01 04:28:59.934704', 'step': 9110, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:28:59.988525', 'step': 9110, 'epoch': 2} {'type': 'loss', 'content': 0.13003678619861603, 'timestamp': '2025-10-01 04:28:59.990933', 'step': 9111, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:00.051569', 'step': 9111, 'epoch': 2} {'type': 'loss', 'content': 0.06720440089702606, 'timestamp': '2025-10-01 04:29:00.057839', 'step': 9112, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:00.115107', 'step': 9112, 'epoch': 2} {'type': 'loss', 'content': 0.1684623658657074, 'timestamp': '2025-10-01 04:29:00.117882', 'step': 9113, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:00.185555', 'step': 9113, 'epoch': 2} {'type': 'loss', 'content': 0.0912332758307457, 'timestamp': '2025-10-01 04:29:00.187653', 'step': 9114, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:00.252570', 'step': 9114, 'epoch': 2} {'type': 'loss', 'content': 0.1913035809993744, 'timestamp': '2025-10-01 04:29:00.254845', 'step': 9115, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:00.312941', 'step': 9115, 'epoch': 2} {'type': 'loss', 'content': 0.17530690133571625, 'timestamp': '2025-10-01 04:29:00.319691', 'step': 9116, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:00.382733', 'step': 9116, 'epoch': 2} {'type': 'loss', 'content': 0.0729808583855629, 'timestamp': '2025-10-01 04:29:00.386614', 'step': 9117, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:00.445132', 'step': 9117, 'epoch': 2} {'type': 'loss', 'content': 0.09410767257213593, 'timestamp': '2025-10-01 04:29:00.447304', 'step': 9118, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:00.502187', 'step': 9118, 'epoch': 2} {'type': 'loss', 'content': 0.12013798207044601, 'timestamp': '2025-10-01 04:29:00.504279', 'step': 9119, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:00.557686', 'step': 9119, 'epoch': 2} {'type': 'loss', 'content': 0.15569011867046356, 'timestamp': '2025-10-01 04:29:00.565397', 'step': 9120, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:29:00.619583', 'step': 9120, 'epoch': 2} {'type': 'loss', 'content': 0.07911647111177444, 'timestamp': '2025-10-01 04:29:00.621626', 'step': 9121, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:29:00.677232', 'step': 9121, 'epoch': 2} {'type': 'loss', 'content': 0.14428108930587769, 'timestamp': '2025-10-01 04:29:00.679459', 'step': 9122, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:00.737108', 'step': 9122, 'epoch': 2} {'type': 'loss', 'content': 0.09628847986459732, 'timestamp': '2025-10-01 04:29:00.739752', 'step': 9123, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:29:00.798391', 'step': 9123, 'epoch': 2} {'type': 'loss', 'content': 0.12693555653095245, 'timestamp': '2025-10-01 04:29:00.805405', 'step': 9124, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:00.863688', 'step': 9124, 'epoch': 2} {'type': 'loss', 'content': 0.0935618057847023, 'timestamp': '2025-10-01 04:29:00.865976', 'step': 9125, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:00.929050', 'step': 9125, 'epoch': 2} {'type': 'loss', 'content': 0.09757937490940094, 'timestamp': '2025-10-01 04:29:00.932401', 'step': 9126, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:00.986949', 'step': 9126, 'epoch': 2} {'type': 'loss', 'content': 0.15277203917503357, 'timestamp': '2025-10-01 04:29:00.989149', 'step': 9127, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:01.042358', 'step': 9127, 'epoch': 2} {'type': 'loss', 'content': 0.14836496114730835, 'timestamp': '2025-10-01 04:29:01.048526', 'step': 9128, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:01.101378', 'step': 9128, 'epoch': 2} {'type': 'loss', 'content': 0.06489741802215576, 'timestamp': '2025-10-01 04:29:01.103412', 'step': 9129, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:01.158835', 'step': 9129, 'epoch': 2} {'type': 'loss', 'content': 0.11159171164035797, 'timestamp': '2025-10-01 04:29:01.161499', 'step': 9130, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:01.225701', 'step': 9130, 'epoch': 2} {'type': 'loss', 'content': 0.1342388540506363, 'timestamp': '2025-10-01 04:29:01.228047', 'step': 9131, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:29:01.281662', 'step': 9131, 'epoch': 2} {'type': 'loss', 'content': 0.14917579293251038, 'timestamp': '2025-10-01 04:29:01.287693', 'step': 9132, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:29:01.346442', 'step': 9132, 'epoch': 2} {'type': 'loss', 'content': 0.09079943597316742, 'timestamp': '2025-10-01 04:29:01.348986', 'step': 9133, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:01.403086', 'step': 9133, 'epoch': 2} {'type': 'loss', 'content': 0.10330723226070404, 'timestamp': '2025-10-01 04:29:01.405164', 'step': 9134, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:01.459387', 'step': 9134, 'epoch': 2} {'type': 'loss', 'content': 0.19197988510131836, 'timestamp': '2025-10-01 04:29:01.461478', 'step': 9135, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:01.516443', 'step': 9135, 'epoch': 2} {'type': 'loss', 'content': 0.10697393119335175, 'timestamp': '2025-10-01 04:29:01.522171', 'step': 9136, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:01.582103', 'step': 9136, 'epoch': 2} {'type': 'loss', 'content': 0.09923557937145233, 'timestamp': '2025-10-01 04:29:01.584782', 'step': 9137, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:01.637804', 'step': 9137, 'epoch': 2} {'type': 'loss', 'content': 0.08351361751556396, 'timestamp': '2025-10-01 04:29:01.642610', 'step': 9138, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-01 04:29:01.706803', 'step': 9138, 'epoch': 2} {'type': 'loss', 'content': 0.11212428659200668, 'timestamp': '2025-10-01 04:29:01.709051', 'step': 9139, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:01.762029', 'step': 9139, 'epoch': 2} {'type': 'loss', 'content': 0.16121262311935425, 'timestamp': '2025-10-01 04:29:01.773396', 'step': 9140, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:01.829330', 'step': 9140, 'epoch': 2} {'type': 'loss', 'content': 0.0970616489648819, 'timestamp': '2025-10-01 04:29:01.832032', 'step': 9141, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:01.891077', 'step': 9141, 'epoch': 2} {'type': 'loss', 'content': 0.0939977616071701, 'timestamp': '2025-10-01 04:29:01.893235', 'step': 9142, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:29:01.946816', 'step': 9142, 'epoch': 2} {'type': 'loss', 'content': 0.09645982086658478, 'timestamp': '2025-10-01 04:29:01.949564', 'step': 9143, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:02.013187', 'step': 9143, 'epoch': 2} {'type': 'loss', 'content': 0.15733417868614197, 'timestamp': '2025-10-01 04:29:02.018969', 'step': 9144, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:02.071756', 'step': 9144, 'epoch': 2} {'type': 'loss', 'content': 0.23787853121757507, 'timestamp': '2025-10-01 04:29:02.076237', 'step': 9145, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:02.165734', 'step': 9145, 'epoch': 2} {'type': 'loss', 'content': 0.10056866705417633, 'timestamp': '2025-10-01 04:29:02.167968', 'step': 9146, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:02.221645', 'step': 9146, 'epoch': 2} {'type': 'loss', 'content': 0.23329859972000122, 'timestamp': '2025-10-01 04:29:02.232198', 'step': 9147, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:02.297444', 'step': 9147, 'epoch': 2} {'type': 'loss', 'content': 0.056621525436639786, 'timestamp': '2025-10-01 04:29:02.303301', 'step': 9148, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:02.357049', 'step': 9148, 'epoch': 2} {'type': 'loss', 'content': 0.10416962951421738, 'timestamp': '2025-10-01 04:29:02.366259', 'step': 9149, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:02.419591', 'step': 9149, 'epoch': 2} {'type': 'loss', 'content': 0.18983745574951172, 'timestamp': '2025-10-01 04:29:02.425872', 'step': 9150, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:29:02.483364', 'step': 9150, 'epoch': 2} {'type': 'loss', 'content': 0.25357961654663086, 'timestamp': '2025-10-01 04:29:02.485660', 'step': 9151, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:02.539264', 'step': 9151, 'epoch': 2} {'type': 'loss', 'content': 0.0973355695605278, 'timestamp': '2025-10-01 04:29:02.545446', 'step': 9152, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:29:02.598386', 'step': 9152, 'epoch': 2} {'type': 'loss', 'content': 0.1942954808473587, 'timestamp': '2025-10-01 04:29:02.600517', 'step': 9153, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:02.653589', 'step': 9153, 'epoch': 2} {'type': 'loss', 'content': 0.1172066405415535, 'timestamp': '2025-10-01 04:29:02.655794', 'step': 9154, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:29:02.717320', 'step': 9154, 'epoch': 2} {'type': 'loss', 'content': 0.13232997059822083, 'timestamp': '2025-10-01 04:29:02.719629', 'step': 9155, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:02.773122', 'step': 9155, 'epoch': 2} {'type': 'loss', 'content': 0.08137287199497223, 'timestamp': '2025-10-01 04:29:02.778860', 'step': 9156, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:29:02.831719', 'step': 9156, 'epoch': 2} {'type': 'loss', 'content': 0.13348102569580078, 'timestamp': '2025-10-01 04:29:02.834267', 'step': 9157, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:02.892682', 'step': 9157, 'epoch': 2} {'type': 'loss', 'content': 0.12107931077480316, 'timestamp': '2025-10-01 04:29:02.894909', 'step': 9158, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:29:02.948271', 'step': 9158, 'epoch': 2} {'type': 'loss', 'content': 0.07217030972242355, 'timestamp': '2025-10-01 04:29:02.957643', 'step': 9159, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:29:03.010609', 'step': 9159, 'epoch': 2} {'type': 'loss', 'content': 0.1903122365474701, 'timestamp': '2025-10-01 04:29:03.016043', 'step': 9160, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:29:03.069753', 'step': 9160, 'epoch': 2} {'type': 'loss', 'content': 0.07186271995306015, 'timestamp': '2025-10-01 04:29:03.071872', 'step': 9161, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:03.125763', 'step': 9161, 'epoch': 2} {'type': 'loss', 'content': 0.11631660163402557, 'timestamp': '2025-10-01 04:29:03.127998', 'step': 9162, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:03.181717', 'step': 9162, 'epoch': 2} {'type': 'loss', 'content': 0.09701450914144516, 'timestamp': '2025-10-01 04:29:03.183839', 'step': 9163, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:03.237913', 'step': 9163, 'epoch': 2} {'type': 'loss', 'content': 0.11338522285223007, 'timestamp': '2025-10-01 04:29:03.243485', 'step': 9164, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:03.296408', 'step': 9164, 'epoch': 2} {'type': 'loss', 'content': 0.2662525475025177, 'timestamp': '2025-10-01 04:29:03.298419', 'step': 9165, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:03.352142', 'step': 9165, 'epoch': 2} {'type': 'loss', 'content': 0.13751210272312164, 'timestamp': '2025-10-01 04:29:03.354401', 'step': 9166, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:29:03.410017', 'step': 9166, 'epoch': 2} {'type': 'loss', 'content': 0.14442330598831177, 'timestamp': '2025-10-01 04:29:03.412109', 'step': 9167, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:03.470640', 'step': 9167, 'epoch': 2} {'type': 'loss', 'content': 0.1068301573395729, 'timestamp': '2025-10-01 04:29:03.476336', 'step': 9168, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:29:03.537058', 'step': 9168, 'epoch': 2} {'type': 'loss', 'content': 0.12101564556360245, 'timestamp': '2025-10-01 04:29:03.539142', 'step': 9169, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:03.593446', 'step': 9169, 'epoch': 2} {'type': 'loss', 'content': 0.1541660726070404, 'timestamp': '2025-10-01 04:29:03.595634', 'step': 9170, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:29:03.649260', 'step': 9170, 'epoch': 2} {'type': 'loss', 'content': 0.10238141566514969, 'timestamp': '2025-10-01 04:29:03.651443', 'step': 9171, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:03.709883', 'step': 9171, 'epoch': 2} {'type': 'loss', 'content': 0.16772156953811646, 'timestamp': '2025-10-01 04:29:03.715922', 'step': 9172, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:03.769600', 'step': 9172, 'epoch': 2} {'type': 'loss', 'content': 0.08156031370162964, 'timestamp': '2025-10-01 04:29:03.772029', 'step': 9173, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:03.825891', 'step': 9173, 'epoch': 2} {'type': 'loss', 'content': 0.24184280633926392, 'timestamp': '2025-10-01 04:29:03.828114', 'step': 9174, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:03.881428', 'step': 9174, 'epoch': 2} {'type': 'loss', 'content': 0.20850889384746552, 'timestamp': '2025-10-01 04:29:03.883671', 'step': 9175, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:03.939134', 'step': 9175, 'epoch': 2} {'type': 'loss', 'content': 0.08051357418298721, 'timestamp': '2025-10-01 04:29:03.946061', 'step': 9176, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:03.998876', 'step': 9176, 'epoch': 2} {'type': 'loss', 'content': 0.09497138857841492, 'timestamp': '2025-10-01 04:29:04.001559', 'step': 9177, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:04.059917', 'step': 9177, 'epoch': 2} {'type': 'loss', 'content': 0.1274936944246292, 'timestamp': '2025-10-01 04:29:04.061940', 'step': 9178, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:04.114912', 'step': 9178, 'epoch': 2} {'type': 'loss', 'content': 0.31247279047966003, 'timestamp': '2025-10-01 04:29:04.116971', 'step': 9179, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:04.169722', 'step': 9179, 'epoch': 2} {'type': 'loss', 'content': 0.08892407268285751, 'timestamp': '2025-10-01 04:29:04.175484', 'step': 9180, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:04.228052', 'step': 9180, 'epoch': 2} {'type': 'loss', 'content': 0.139097198843956, 'timestamp': '2025-10-01 04:29:04.236449', 'step': 9181, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:04.289732', 'step': 9181, 'epoch': 2} {'type': 'loss', 'content': 0.1318904161453247, 'timestamp': '2025-10-01 04:29:04.291782', 'step': 9182, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:29:04.344914', 'step': 9182, 'epoch': 2} {'type': 'loss', 'content': 0.15148332715034485, 'timestamp': '2025-10-01 04:29:04.347252', 'step': 9183, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:04.400223', 'step': 9183, 'epoch': 2} {'type': 'loss', 'content': 0.15737666189670563, 'timestamp': '2025-10-01 04:29:04.405971', 'step': 9184, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:29:04.461904', 'step': 9184, 'epoch': 2} {'type': 'loss', 'content': 0.07930696755647659, 'timestamp': '2025-10-01 04:29:04.464057', 'step': 9185, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:04.518082', 'step': 9185, 'epoch': 2} {'type': 'loss', 'content': 0.1266728639602661, 'timestamp': '2025-10-01 04:29:04.519887', 'step': 9186, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:04.572833', 'step': 9186, 'epoch': 2} {'type': 'loss', 'content': 0.1173645406961441, 'timestamp': '2025-10-01 04:29:04.574976', 'step': 9187, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:04.627321', 'step': 9187, 'epoch': 2} {'type': 'loss', 'content': 0.08515999466180801, 'timestamp': '2025-10-01 04:29:04.632976', 'step': 9188, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:04.685206', 'step': 9188, 'epoch': 2} {'type': 'loss', 'content': 0.18892042338848114, 'timestamp': '2025-10-01 04:29:04.690503', 'step': 9189, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:04.744059', 'step': 9189, 'epoch': 2} {'type': 'loss', 'content': 0.13717934489250183, 'timestamp': '2025-10-01 04:29:04.746225', 'step': 9190, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:04.799021', 'step': 9190, 'epoch': 2} {'type': 'loss', 'content': 0.06591611355543137, 'timestamp': '2025-10-01 04:29:04.801199', 'step': 9191, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:04.856131', 'step': 9191, 'epoch': 2} {'type': 'loss', 'content': 0.11970538645982742, 'timestamp': '2025-10-01 04:29:04.861605', 'step': 9192, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:29:04.914356', 'step': 9192, 'epoch': 2} {'type': 'loss', 'content': 0.21541109681129456, 'timestamp': '2025-10-01 04:29:04.916459', 'step': 9193, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:04.969553', 'step': 9193, 'epoch': 2} {'type': 'loss', 'content': 0.16651852428913116, 'timestamp': '2025-10-01 04:29:04.971618', 'step': 9194, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:05.024884', 'step': 9194, 'epoch': 2} {'type': 'loss', 'content': 0.13474467396736145, 'timestamp': '2025-10-01 04:29:05.027302', 'step': 9195, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:05.080865', 'step': 9195, 'epoch': 2} {'type': 'loss', 'content': 0.1458202451467514, 'timestamp': '2025-10-01 04:29:05.086467', 'step': 9196, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:05.139612', 'step': 9196, 'epoch': 2} {'type': 'loss', 'content': 0.0816497653722763, 'timestamp': '2025-10-01 04:29:05.143279', 'step': 9197, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:29:05.198986', 'step': 9197, 'epoch': 2} {'type': 'loss', 'content': 0.14849494397640228, 'timestamp': '2025-10-01 04:29:05.201202', 'step': 9198, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:05.256955', 'step': 9198, 'epoch': 2} {'type': 'loss', 'content': 0.14530599117279053, 'timestamp': '2025-10-01 04:29:05.259186', 'step': 9199, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:05.316265', 'step': 9199, 'epoch': 2} {'type': 'loss', 'content': 0.17474713921546936, 'timestamp': '2025-10-01 04:29:05.322723', 'step': 9200, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:05.375614', 'step': 9200, 'epoch': 2} {'type': 'loss', 'content': 0.13607120513916016, 'timestamp': '2025-10-01 04:29:05.377614', 'step': 9201, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:05.431239', 'step': 9201, 'epoch': 2} {'type': 'loss', 'content': 0.12241987138986588, 'timestamp': '2025-10-01 04:29:05.433405', 'step': 9202, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:05.486844', 'step': 9202, 'epoch': 2} {'type': 'loss', 'content': 0.10447634011507034, 'timestamp': '2025-10-01 04:29:05.489029', 'step': 9203, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:05.542104', 'step': 9203, 'epoch': 2} {'type': 'loss', 'content': 0.09288531541824341, 'timestamp': '2025-10-01 04:29:05.547843', 'step': 9204, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:05.601826', 'step': 9204, 'epoch': 2} {'type': 'loss', 'content': 0.171123206615448, 'timestamp': '2025-10-01 04:29:05.604031', 'step': 9205, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:05.657168', 'step': 9205, 'epoch': 2} {'type': 'loss', 'content': 0.12278646975755692, 'timestamp': '2025-10-01 04:29:05.659455', 'step': 9206, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:05.713181', 'step': 9206, 'epoch': 2} {'type': 'loss', 'content': 0.0792120024561882, 'timestamp': '2025-10-01 04:29:05.715910', 'step': 9207, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:05.773801', 'step': 9207, 'epoch': 2} {'type': 'loss', 'content': 0.12366944551467896, 'timestamp': '2025-10-01 04:29:05.779762', 'step': 9208, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:05.832820', 'step': 9208, 'epoch': 2} {'type': 'loss', 'content': 0.11406852304935455, 'timestamp': '2025-10-01 04:29:05.834890', 'step': 9209, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:05.889099', 'step': 9209, 'epoch': 2} {'type': 'loss', 'content': 0.12305555492639542, 'timestamp': '2025-10-01 04:29:05.891318', 'step': 9210, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:05.944848', 'step': 9210, 'epoch': 2} {'type': 'loss', 'content': 0.13441115617752075, 'timestamp': '2025-10-01 04:29:05.947440', 'step': 9211, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:06.000809', 'step': 9211, 'epoch': 2} {'type': 'loss', 'content': 0.18739648163318634, 'timestamp': '2025-10-01 04:29:06.006497', 'step': 9212, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:06.059960', 'step': 9212, 'epoch': 2} {'type': 'loss', 'content': 0.14508353173732758, 'timestamp': '2025-10-01 04:29:06.061807', 'step': 9213, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:06.114283', 'step': 9213, 'epoch': 2} {'type': 'loss', 'content': 0.17898188531398773, 'timestamp': '2025-10-01 04:29:06.116783', 'step': 9214, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:06.175778', 'step': 9214, 'epoch': 2} {'type': 'loss', 'content': 0.0764835998415947, 'timestamp': '2025-10-01 04:29:06.178274', 'step': 9215, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:06.231895', 'step': 9215, 'epoch': 2} {'type': 'loss', 'content': 0.08000041544437408, 'timestamp': '2025-10-01 04:29:06.237746', 'step': 9216, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:06.290460', 'step': 9216, 'epoch': 2} {'type': 'loss', 'content': 0.091944120824337, 'timestamp': '2025-10-01 04:29:06.292721', 'step': 9217, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:06.346275', 'step': 9217, 'epoch': 2} {'type': 'loss', 'content': 0.12496073544025421, 'timestamp': '2025-10-01 04:29:06.348668', 'step': 9218, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:06.403005', 'step': 9218, 'epoch': 2} {'type': 'loss', 'content': 0.11226984113454819, 'timestamp': '2025-10-01 04:29:06.404817', 'step': 9219, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:29:06.458391', 'step': 9219, 'epoch': 2} {'type': 'loss', 'content': 0.11087242513895035, 'timestamp': '2025-10-01 04:29:06.464128', 'step': 9220, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:06.516996', 'step': 9220, 'epoch': 2} {'type': 'loss', 'content': 0.10034438222646713, 'timestamp': '2025-10-01 04:29:06.519063', 'step': 9221, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:06.573008', 'step': 9221, 'epoch': 2} {'type': 'loss', 'content': 0.13170166313648224, 'timestamp': '2025-10-01 04:29:06.575152', 'step': 9222, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:06.629170', 'step': 9222, 'epoch': 2} {'type': 'loss', 'content': 0.11281921714544296, 'timestamp': '2025-10-01 04:29:06.631567', 'step': 9223, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:06.684764', 'step': 9223, 'epoch': 2} {'type': 'loss', 'content': 0.19018058478832245, 'timestamp': '2025-10-01 04:29:06.690966', 'step': 9224, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:06.751156', 'step': 9224, 'epoch': 2} {'type': 'loss', 'content': 0.09963654726743698, 'timestamp': '2025-10-01 04:29:06.753399', 'step': 9225, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:29:06.807905', 'step': 9225, 'epoch': 2} {'type': 'loss', 'content': 0.10215331614017487, 'timestamp': '2025-10-01 04:29:06.810124', 'step': 9226, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:06.865464', 'step': 9226, 'epoch': 2} {'type': 'loss', 'content': 0.08945240080356598, 'timestamp': '2025-10-01 04:29:06.867590', 'step': 9227, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:06.920393', 'step': 9227, 'epoch': 2} {'type': 'loss', 'content': 0.13217788934707642, 'timestamp': '2025-10-01 04:29:06.926338', 'step': 9228, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:06.980213', 'step': 9228, 'epoch': 2} {'type': 'loss', 'content': 0.11676604300737381, 'timestamp': '2025-10-01 04:29:06.990866', 'step': 9229, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:07.046294', 'step': 9229, 'epoch': 2} {'type': 'loss', 'content': 0.15388403832912445, 'timestamp': '2025-10-01 04:29:07.048614', 'step': 9230, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:07.106276', 'step': 9230, 'epoch': 2} {'type': 'loss', 'content': 0.1178990826010704, 'timestamp': '2025-10-01 04:29:07.108957', 'step': 9231, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:07.162837', 'step': 9231, 'epoch': 2} {'type': 'loss', 'content': 0.08409072458744049, 'timestamp': '2025-10-01 04:29:07.168998', 'step': 9232, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:07.221977', 'step': 9232, 'epoch': 2} {'type': 'loss', 'content': 0.15427573025226593, 'timestamp': '2025-10-01 04:29:07.226364', 'step': 9233, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:29:07.279569', 'step': 9233, 'epoch': 2} {'type': 'loss', 'content': 0.18655365705490112, 'timestamp': '2025-10-01 04:29:07.282131', 'step': 9234, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:07.335235', 'step': 9234, 'epoch': 2} {'type': 'loss', 'content': 0.1717187613248825, 'timestamp': '2025-10-01 04:29:07.337530', 'step': 9235, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:07.390638', 'step': 9235, 'epoch': 2} {'type': 'loss', 'content': 0.13916631042957306, 'timestamp': '2025-10-01 04:29:07.396371', 'step': 9236, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:07.449314', 'step': 9236, 'epoch': 2} {'type': 'loss', 'content': 0.16902421414852142, 'timestamp': '2025-10-01 04:29:07.451923', 'step': 9237, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:07.508574', 'step': 9237, 'epoch': 2} {'type': 'loss', 'content': 0.17421503365039825, 'timestamp': '2025-10-01 04:29:07.511048', 'step': 9238, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:07.566157', 'step': 9238, 'epoch': 2} {'type': 'loss', 'content': 0.1347789317369461, 'timestamp': '2025-10-01 04:29:07.568899', 'step': 9239, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:29:07.622825', 'step': 9239, 'epoch': 2} {'type': 'loss', 'content': 0.1645331084728241, 'timestamp': '2025-10-01 04:29:07.629468', 'step': 9240, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:07.683004', 'step': 9240, 'epoch': 2} {'type': 'loss', 'content': 0.21687105298042297, 'timestamp': '2025-10-01 04:29:07.685319', 'step': 9241, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:07.739986', 'step': 9241, 'epoch': 2} {'type': 'loss', 'content': 0.1277775913476944, 'timestamp': '2025-10-01 04:29:07.742419', 'step': 9242, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:07.796840', 'step': 9242, 'epoch': 2} {'type': 'loss', 'content': 0.13050919771194458, 'timestamp': '2025-10-01 04:29:07.799091', 'step': 9243, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:07.853491', 'step': 9243, 'epoch': 2} {'type': 'loss', 'content': 0.15657490491867065, 'timestamp': '2025-10-01 04:29:07.866690', 'step': 9244, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:29:07.930725', 'step': 9244, 'epoch': 2} {'type': 'loss', 'content': 0.1301969736814499, 'timestamp': '2025-10-01 04:29:07.933054', 'step': 9245, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:07.987439', 'step': 9245, 'epoch': 2} {'type': 'loss', 'content': 0.06563763320446014, 'timestamp': '2025-10-01 04:29:07.989808', 'step': 9246, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:08.044027', 'step': 9246, 'epoch': 2} {'type': 'loss', 'content': 0.09927252680063248, 'timestamp': '2025-10-01 04:29:08.046601', 'step': 9247, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:08.100908', 'step': 9247, 'epoch': 2} {'type': 'loss', 'content': 0.09281378984451294, 'timestamp': '2025-10-01 04:29:08.107435', 'step': 9248, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:08.168127', 'step': 9248, 'epoch': 2} {'type': 'loss', 'content': 0.11908166110515594, 'timestamp': '2025-10-01 04:29:08.170305', 'step': 9249, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:08.224710', 'step': 9249, 'epoch': 2} {'type': 'loss', 'content': 0.17448607087135315, 'timestamp': '2025-10-01 04:29:08.227341', 'step': 9250, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:08.281637', 'step': 9250, 'epoch': 2} {'type': 'loss', 'content': 0.11643113195896149, 'timestamp': '2025-10-01 04:29:08.283947', 'step': 9251, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:08.338467', 'step': 9251, 'epoch': 2} {'type': 'loss', 'content': 0.12975390255451202, 'timestamp': '2025-10-01 04:29:08.347013', 'step': 9252, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:08.411455', 'step': 9252, 'epoch': 2} {'type': 'loss', 'content': 0.09287437051534653, 'timestamp': '2025-10-01 04:29:08.414060', 'step': 9253, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:08.469168', 'step': 9253, 'epoch': 2} {'type': 'loss', 'content': 0.1712399125099182, 'timestamp': '2025-10-01 04:29:08.471893', 'step': 9254, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:08.526748', 'step': 9254, 'epoch': 2} {'type': 'loss', 'content': 0.06709792464971542, 'timestamp': '2025-10-01 04:29:08.529519', 'step': 9255, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:08.583736', 'step': 9255, 'epoch': 2} {'type': 'loss', 'content': 0.20394760370254517, 'timestamp': '2025-10-01 04:29:08.590111', 'step': 9256, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:08.644298', 'step': 9256, 'epoch': 2} {'type': 'loss', 'content': 0.23948408663272858, 'timestamp': '2025-10-01 04:29:08.650045', 'step': 9257, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:29:08.717008', 'step': 9257, 'epoch': 2} {'type': 'loss', 'content': 0.2090183049440384, 'timestamp': '2025-10-01 04:29:08.727021', 'step': 9258, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:08.804100', 'step': 9258, 'epoch': 2} {'type': 'loss', 'content': 0.14904412627220154, 'timestamp': '2025-10-01 04:29:08.808697', 'step': 9259, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:08.888876', 'step': 9259, 'epoch': 2} {'type': 'loss', 'content': 0.11669249087572098, 'timestamp': '2025-10-01 04:29:08.896142', 'step': 9260, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:08.972366', 'step': 9260, 'epoch': 2} {'type': 'loss', 'content': 0.1383824348449707, 'timestamp': '2025-10-01 04:29:08.976974', 'step': 9261, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:09.059780', 'step': 9261, 'epoch': 2} {'type': 'loss', 'content': 0.06462371349334717, 'timestamp': '2025-10-01 04:29:09.070307', 'step': 9262, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:09.142666', 'step': 9262, 'epoch': 2} {'type': 'loss', 'content': 0.1044575646519661, 'timestamp': '2025-10-01 04:29:09.152641', 'step': 9263, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:09.234789', 'step': 9263, 'epoch': 2} {'type': 'loss', 'content': 0.13678953051567078, 'timestamp': '2025-10-01 04:29:09.246359', 'step': 9264, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:29:09.309652', 'step': 9264, 'epoch': 2} {'type': 'loss', 'content': 0.17836637794971466, 'timestamp': '2025-10-01 04:29:09.316092', 'step': 9265, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:09.389502', 'step': 9265, 'epoch': 2} {'type': 'loss', 'content': 0.1866450011730194, 'timestamp': '2025-10-01 04:29:09.403192', 'step': 9266, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:09.494539', 'step': 9266, 'epoch': 2} {'type': 'loss', 'content': 0.13096757233142853, 'timestamp': '2025-10-01 04:29:09.507800', 'step': 9267, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:29:09.589635', 'step': 9267, 'epoch': 2} {'type': 'loss', 'content': 0.15685023367404938, 'timestamp': '2025-10-01 04:29:09.609043', 'step': 9268, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:09.692244', 'step': 9268, 'epoch': 2} {'type': 'loss', 'content': 0.12464923411607742, 'timestamp': '2025-10-01 04:29:09.696844', 'step': 9269, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-01 04:29:09.761795', 'step': 9269, 'epoch': 2} {'type': 'loss', 'content': 0.13929738104343414, 'timestamp': '2025-10-01 04:29:09.763713', 'step': 9270, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:09.817369', 'step': 9270, 'epoch': 2} {'type': 'loss', 'content': 0.08214227110147476, 'timestamp': '2025-10-01 04:29:09.819670', 'step': 9271, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:09.873720', 'step': 9271, 'epoch': 2} {'type': 'loss', 'content': 0.13964244723320007, 'timestamp': '2025-10-01 04:29:09.879315', 'step': 9272, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:09.932054', 'step': 9272, 'epoch': 2} {'type': 'loss', 'content': 0.1276014894247055, 'timestamp': '2025-10-01 04:29:09.934269', 'step': 9273, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-01 04:29:09.989768', 'step': 9273, 'epoch': 2} {'type': 'loss', 'content': 0.06655168533325195, 'timestamp': '2025-10-01 04:29:09.992008', 'step': 9274, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:10.046496', 'step': 9274, 'epoch': 2} {'type': 'loss', 'content': 0.08018461614847183, 'timestamp': '2025-10-01 04:29:10.048903', 'step': 9275, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:10.102833', 'step': 9275, 'epoch': 2} {'type': 'loss', 'content': 0.15052908658981323, 'timestamp': '2025-10-01 04:29:10.108593', 'step': 9276, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:10.162066', 'step': 9276, 'epoch': 2} {'type': 'loss', 'content': 0.12086556106805801, 'timestamp': '2025-10-01 04:29:10.164271', 'step': 9277, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:10.217574', 'step': 9277, 'epoch': 2} {'type': 'loss', 'content': 0.06392661482095718, 'timestamp': '2025-10-01 04:29:10.219910', 'step': 9278, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:10.273833', 'step': 9278, 'epoch': 2} {'type': 'loss', 'content': 0.17041000723838806, 'timestamp': '2025-10-01 04:29:10.277781', 'step': 9279, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:10.330462', 'step': 9279, 'epoch': 2} {'type': 'loss', 'content': 0.15697529911994934, 'timestamp': '2025-10-01 04:29:10.336191', 'step': 9280, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:10.389003', 'step': 9280, 'epoch': 2} {'type': 'loss', 'content': 0.07687042653560638, 'timestamp': '2025-10-01 04:29:10.392938', 'step': 9281, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:10.447473', 'step': 9281, 'epoch': 2} {'type': 'loss', 'content': 0.13111360371112823, 'timestamp': '2025-10-01 04:29:10.449833', 'step': 9282, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:10.503736', 'step': 9282, 'epoch': 2} {'type': 'loss', 'content': 0.10975901037454605, 'timestamp': '2025-10-01 04:29:10.506054', 'step': 9283, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:10.566677', 'step': 9283, 'epoch': 2} {'type': 'loss', 'content': 0.12774533033370972, 'timestamp': '2025-10-01 04:29:10.572871', 'step': 9284, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:29:10.625958', 'step': 9284, 'epoch': 2} {'type': 'loss', 'content': 0.22365258634090424, 'timestamp': '2025-10-01 04:29:10.628104', 'step': 9285, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:10.681406', 'step': 9285, 'epoch': 2} {'type': 'loss', 'content': 0.12312966585159302, 'timestamp': '2025-10-01 04:29:10.683454', 'step': 9286, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:10.736903', 'step': 9286, 'epoch': 2} {'type': 'loss', 'content': 0.16301842033863068, 'timestamp': '2025-10-01 04:29:10.740521', 'step': 9287, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:10.807704', 'step': 9287, 'epoch': 2} {'type': 'loss', 'content': 0.06981940567493439, 'timestamp': '2025-10-01 04:29:10.813807', 'step': 9288, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:10.867735', 'step': 9288, 'epoch': 2} {'type': 'loss', 'content': 0.10164069384336472, 'timestamp': '2025-10-01 04:29:10.869951', 'step': 9289, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:10.923862', 'step': 9289, 'epoch': 2} {'type': 'loss', 'content': 0.05147860199213028, 'timestamp': '2025-10-01 04:29:10.926030', 'step': 9290, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:10.979348', 'step': 9290, 'epoch': 2} {'type': 'loss', 'content': 0.14745670557022095, 'timestamp': '2025-10-01 04:29:10.982208', 'step': 9291, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:11.035229', 'step': 9291, 'epoch': 2} {'type': 'loss', 'content': 0.15422222018241882, 'timestamp': '2025-10-01 04:29:11.040898', 'step': 9292, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:29:11.093640', 'step': 9292, 'epoch': 2} {'type': 'loss', 'content': 0.17847396433353424, 'timestamp': '2025-10-01 04:29:11.095825', 'step': 9293, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:11.149535', 'step': 9293, 'epoch': 2} {'type': 'loss', 'content': 0.18418830633163452, 'timestamp': '2025-10-01 04:29:11.151737', 'step': 9294, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:11.204879', 'step': 9294, 'epoch': 2} {'type': 'loss', 'content': 0.06492520123720169, 'timestamp': '2025-10-01 04:29:11.207084', 'step': 9295, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:11.261195', 'step': 9295, 'epoch': 2} {'type': 'loss', 'content': 0.16524922847747803, 'timestamp': '2025-10-01 04:29:11.266809', 'step': 9296, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:11.323811', 'step': 9296, 'epoch': 2} {'type': 'loss', 'content': 0.063322052359581, 'timestamp': '2025-10-01 04:29:11.326030', 'step': 9297, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:29:11.379073', 'step': 9297, 'epoch': 2} {'type': 'loss', 'content': 0.1956794112920761, 'timestamp': '2025-10-01 04:29:11.381274', 'step': 9298, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:11.433959', 'step': 9298, 'epoch': 2} {'type': 'loss', 'content': 0.1679268181324005, 'timestamp': '2025-10-01 04:29:11.436195', 'step': 9299, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:11.489122', 'step': 9299, 'epoch': 2} {'type': 'loss', 'content': 0.1986771523952484, 'timestamp': '2025-10-01 04:29:11.494807', 'step': 9300, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:11.547634', 'step': 9300, 'epoch': 2} {'type': 'loss', 'content': 0.07003515958786011, 'timestamp': '2025-10-01 04:29:11.549791', 'step': 9301, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:11.602522', 'step': 9301, 'epoch': 2} {'type': 'loss', 'content': 0.11948682367801666, 'timestamp': '2025-10-01 04:29:11.604785', 'step': 9302, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:11.658128', 'step': 9302, 'epoch': 2} {'type': 'loss', 'content': 0.12166434526443481, 'timestamp': '2025-10-01 04:29:11.660296', 'step': 9303, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:11.714246', 'step': 9303, 'epoch': 2} {'type': 'loss', 'content': 0.1514325588941574, 'timestamp': '2025-10-01 04:29:11.719846', 'step': 9304, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:11.772405', 'step': 9304, 'epoch': 2} {'type': 'loss', 'content': 0.12283043563365936, 'timestamp': '2025-10-01 04:29:11.774611', 'step': 9305, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:11.832756', 'step': 9305, 'epoch': 2} {'type': 'loss', 'content': 0.15941745042800903, 'timestamp': '2025-10-01 04:29:11.834762', 'step': 9306, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:29:11.887949', 'step': 9306, 'epoch': 2} {'type': 'loss', 'content': 0.0990460216999054, 'timestamp': '2025-10-01 04:29:11.889928', 'step': 9307, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:11.942305', 'step': 9307, 'epoch': 2} {'type': 'loss', 'content': 0.1391754001379013, 'timestamp': '2025-10-01 04:29:11.948034', 'step': 9308, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:12.001248', 'step': 9308, 'epoch': 2} {'type': 'loss', 'content': 0.13862042129039764, 'timestamp': '2025-10-01 04:29:12.003521', 'step': 9309, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:12.056454', 'step': 9309, 'epoch': 2} {'type': 'loss', 'content': 0.09841340780258179, 'timestamp': '2025-10-01 04:29:12.058555', 'step': 9310, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:12.111727', 'step': 9310, 'epoch': 2} {'type': 'loss', 'content': 0.10263365507125854, 'timestamp': '2025-10-01 04:29:12.113908', 'step': 9311, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:12.166684', 'step': 9311, 'epoch': 2} {'type': 'loss', 'content': 0.174286350607872, 'timestamp': '2025-10-01 04:29:12.172566', 'step': 9312, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:12.225813', 'step': 9312, 'epoch': 2} {'type': 'loss', 'content': 0.16439449787139893, 'timestamp': '2025-10-01 04:29:12.227836', 'step': 9313, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:12.284186', 'step': 9313, 'epoch': 2} {'type': 'loss', 'content': 0.10649538785219193, 'timestamp': '2025-10-01 04:29:12.286230', 'step': 9314, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:12.339455', 'step': 9314, 'epoch': 2} {'type': 'loss', 'content': 0.11337350308895111, 'timestamp': '2025-10-01 04:29:12.341504', 'step': 9315, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:12.394695', 'step': 9315, 'epoch': 2} {'type': 'loss', 'content': 0.09836114197969437, 'timestamp': '2025-10-01 04:29:12.400457', 'step': 9316, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:12.452930', 'step': 9316, 'epoch': 2} {'type': 'loss', 'content': 0.10530659556388855, 'timestamp': '2025-10-01 04:29:12.455018', 'step': 9317, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:12.508635', 'step': 9317, 'epoch': 2} {'type': 'loss', 'content': 0.059864405542612076, 'timestamp': '2025-10-01 04:29:12.510661', 'step': 9318, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:12.564320', 'step': 9318, 'epoch': 2} {'type': 'loss', 'content': 0.07887257635593414, 'timestamp': '2025-10-01 04:29:12.566369', 'step': 9319, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:12.619847', 'step': 9319, 'epoch': 2} {'type': 'loss', 'content': 0.11375285685062408, 'timestamp': '2025-10-01 04:29:12.625662', 'step': 9320, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-01 04:29:25.806261', 'step': 9320, 'epoch': 2} {'type': 'pplx', 'content': 14276.027184625354, 'timestamp': '2025-10-01 04:29:25.809570', 'step': 9320, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:25.863805', 'step': 9320, 'epoch': 2} {'type': 'loss', 'content': 0.11667057126760483, 'timestamp': '2025-10-01 04:29:25.866177', 'step': 9321, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:25.921890', 'step': 9321, 'epoch': 2} {'type': 'loss', 'content': 0.14466339349746704, 'timestamp': '2025-10-01 04:29:25.924610', 'step': 9322, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:29:25.980381', 'step': 9322, 'epoch': 2} {'type': 'loss', 'content': 0.14166375994682312, 'timestamp': '2025-10-01 04:29:25.983199', 'step': 9323, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:26.040259', 'step': 9323, 'epoch': 2} {'type': 'loss', 'content': 0.21909716725349426, 'timestamp': '2025-10-01 04:29:26.046684', 'step': 9324, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:26.100177', 'step': 9324, 'epoch': 2} {'type': 'loss', 'content': 0.15561431646347046, 'timestamp': '2025-10-01 04:29:26.103223', 'step': 9325, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:26.166411', 'step': 9325, 'epoch': 2} {'type': 'loss', 'content': 0.17122849822044373, 'timestamp': '2025-10-01 04:29:26.168360', 'step': 9326, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:26.222812', 'step': 9326, 'epoch': 2} {'type': 'loss', 'content': 0.07145319879055023, 'timestamp': '2025-10-01 04:29:26.224997', 'step': 9327, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:26.289042', 'step': 9327, 'epoch': 2} {'type': 'loss', 'content': 0.19310294091701508, 'timestamp': '2025-10-01 04:29:26.295209', 'step': 9328, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:26.365480', 'step': 9328, 'epoch': 2} {'type': 'loss', 'content': 0.08615490049123764, 'timestamp': '2025-10-01 04:29:26.367689', 'step': 9329, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:26.420734', 'step': 9329, 'epoch': 2} {'type': 'loss', 'content': 0.12133324146270752, 'timestamp': '2025-10-01 04:29:26.423571', 'step': 9330, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:26.480068', 'step': 9330, 'epoch': 2} {'type': 'loss', 'content': 0.140899658203125, 'timestamp': '2025-10-01 04:29:26.482278', 'step': 9331, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:26.538965', 'step': 9331, 'epoch': 2} {'type': 'loss', 'content': 0.14790380001068115, 'timestamp': '2025-10-01 04:29:26.544625', 'step': 9332, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:26.610375', 'step': 9332, 'epoch': 2} {'type': 'loss', 'content': 0.14324218034744263, 'timestamp': '2025-10-01 04:29:26.612833', 'step': 9333, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:26.665589', 'step': 9333, 'epoch': 2} {'type': 'loss', 'content': 0.09056936949491501, 'timestamp': '2025-10-01 04:29:26.667838', 'step': 9334, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:26.722135', 'step': 9334, 'epoch': 2} {'type': 'loss', 'content': 0.11321859806776047, 'timestamp': '2025-10-01 04:29:26.724303', 'step': 9335, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:26.778224', 'step': 9335, 'epoch': 2} {'type': 'loss', 'content': 0.10887617617845535, 'timestamp': '2025-10-01 04:29:26.784006', 'step': 9336, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:29:26.841424', 'step': 9336, 'epoch': 2} {'type': 'loss', 'content': 0.18938851356506348, 'timestamp': '2025-10-01 04:29:26.843473', 'step': 9337, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:26.905997', 'step': 9337, 'epoch': 2} {'type': 'loss', 'content': 0.06994860619306564, 'timestamp': '2025-10-01 04:29:26.908921', 'step': 9338, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:26.964083', 'step': 9338, 'epoch': 2} {'type': 'loss', 'content': 0.09600295126438141, 'timestamp': '2025-10-01 04:29:26.966356', 'step': 9339, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:27.019746', 'step': 9339, 'epoch': 2} {'type': 'loss', 'content': 0.10426695644855499, 'timestamp': '2025-10-01 04:29:27.025390', 'step': 9340, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:27.079277', 'step': 9340, 'epoch': 2} {'type': 'loss', 'content': 0.16499599814414978, 'timestamp': '2025-10-01 04:29:27.081345', 'step': 9341, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:27.144622', 'step': 9341, 'epoch': 2} {'type': 'loss', 'content': 0.061777930706739426, 'timestamp': '2025-10-01 04:29:27.148080', 'step': 9342, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:27.201692', 'step': 9342, 'epoch': 2} {'type': 'loss', 'content': 0.16810795664787292, 'timestamp': '2025-10-01 04:29:27.203828', 'step': 9343, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:27.270013', 'step': 9343, 'epoch': 2} {'type': 'loss', 'content': 0.13134413957595825, 'timestamp': '2025-10-01 04:29:27.275660', 'step': 9344, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:27.330004', 'step': 9344, 'epoch': 2} {'type': 'loss', 'content': 0.116093210875988, 'timestamp': '2025-10-01 04:29:27.338714', 'step': 9345, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:27.392247', 'step': 9345, 'epoch': 2} {'type': 'loss', 'content': 0.11931397020816803, 'timestamp': '2025-10-01 04:29:27.394268', 'step': 9346, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:29:27.447570', 'step': 9346, 'epoch': 2} {'type': 'loss', 'content': 0.10885167121887207, 'timestamp': '2025-10-01 04:29:27.450537', 'step': 9347, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:29:27.504410', 'step': 9347, 'epoch': 2} {'type': 'loss', 'content': 0.14926229417324066, 'timestamp': '2025-10-01 04:29:27.509988', 'step': 9348, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:27.572185', 'step': 9348, 'epoch': 2} {'type': 'loss', 'content': 0.0856858640909195, 'timestamp': '2025-10-01 04:29:27.574418', 'step': 9349, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:27.638591', 'step': 9349, 'epoch': 2} {'type': 'loss', 'content': 0.1948411762714386, 'timestamp': '2025-10-01 04:29:27.640913', 'step': 9350, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:29:27.704416', 'step': 9350, 'epoch': 2} {'type': 'loss', 'content': 0.11240402609109879, 'timestamp': '2025-10-01 04:29:27.706640', 'step': 9351, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:27.767025', 'step': 9351, 'epoch': 2} {'type': 'loss', 'content': 0.11018240451812744, 'timestamp': '2025-10-01 04:29:27.773584', 'step': 9352, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:27.827158', 'step': 9352, 'epoch': 2} {'type': 'loss', 'content': 0.08920469880104065, 'timestamp': '2025-10-01 04:29:27.829228', 'step': 9353, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:27.890524', 'step': 9353, 'epoch': 2} {'type': 'loss', 'content': 0.12344618141651154, 'timestamp': '2025-10-01 04:29:27.892654', 'step': 9354, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:29:27.946309', 'step': 9354, 'epoch': 2} {'type': 'loss', 'content': 0.1336173117160797, 'timestamp': '2025-10-01 04:29:27.948370', 'step': 9355, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:28.002119', 'step': 9355, 'epoch': 2} {'type': 'loss', 'content': 0.1606791764497757, 'timestamp': '2025-10-01 04:29:28.007922', 'step': 9356, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:28.062137', 'step': 9356, 'epoch': 2} {'type': 'loss', 'content': 0.10543052107095718, 'timestamp': '2025-10-01 04:29:28.064185', 'step': 9357, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:28.117179', 'step': 9357, 'epoch': 2} {'type': 'loss', 'content': 0.07876667380332947, 'timestamp': '2025-10-01 04:29:28.119381', 'step': 9358, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:28.172984', 'step': 9358, 'epoch': 2} {'type': 'loss', 'content': 0.10219071805477142, 'timestamp': '2025-10-01 04:29:28.174941', 'step': 9359, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:28.228196', 'step': 9359, 'epoch': 2} {'type': 'loss', 'content': 0.1218085065484047, 'timestamp': '2025-10-01 04:29:28.233984', 'step': 9360, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:28.304485', 'step': 9360, 'epoch': 2} {'type': 'loss', 'content': 0.13573990762233734, 'timestamp': '2025-10-01 04:29:28.307298', 'step': 9361, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:28.378229', 'step': 9361, 'epoch': 2} {'type': 'loss', 'content': 0.19047856330871582, 'timestamp': '2025-10-01 04:29:28.380952', 'step': 9362, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:28.434047', 'step': 9362, 'epoch': 2} {'type': 'loss', 'content': 0.21919547021389008, 'timestamp': '2025-10-01 04:29:28.436766', 'step': 9363, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:28.490470', 'step': 9363, 'epoch': 2} {'type': 'loss', 'content': 0.20959673821926117, 'timestamp': '2025-10-01 04:29:28.496535', 'step': 9364, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:28.550161', 'step': 9364, 'epoch': 2} {'type': 'loss', 'content': 0.13078083097934723, 'timestamp': '2025-10-01 04:29:28.553280', 'step': 9365, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:28.608116', 'step': 9365, 'epoch': 2} {'type': 'loss', 'content': 0.1875227838754654, 'timestamp': '2025-10-01 04:29:28.610836', 'step': 9366, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:28.664894', 'step': 9366, 'epoch': 2} {'type': 'loss', 'content': 0.15873488783836365, 'timestamp': '2025-10-01 04:29:28.680413', 'step': 9367, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:28.748776', 'step': 9367, 'epoch': 2} {'type': 'loss', 'content': 0.16254833340644836, 'timestamp': '2025-10-01 04:29:28.755552', 'step': 9368, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:28.809155', 'step': 9368, 'epoch': 2} {'type': 'loss', 'content': 0.16529691219329834, 'timestamp': '2025-10-01 04:29:28.811242', 'step': 9369, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:29:28.864968', 'step': 9369, 'epoch': 2} {'type': 'loss', 'content': 0.1331941783428192, 'timestamp': '2025-10-01 04:29:28.867124', 'step': 9370, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:28.930809', 'step': 9370, 'epoch': 2} {'type': 'loss', 'content': 0.12107660621404648, 'timestamp': '2025-10-01 04:29:28.932859', 'step': 9371, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:28.987359', 'step': 9371, 'epoch': 2} {'type': 'loss', 'content': 0.1450512409210205, 'timestamp': '2025-10-01 04:29:28.994121', 'step': 9372, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:29.057944', 'step': 9372, 'epoch': 2} {'type': 'loss', 'content': 0.10494005680084229, 'timestamp': '2025-10-01 04:29:29.060254', 'step': 9373, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:29.114813', 'step': 9373, 'epoch': 2} {'type': 'loss', 'content': 0.12091034650802612, 'timestamp': '2025-10-01 04:29:29.116969', 'step': 9374, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:29:29.186323', 'step': 9374, 'epoch': 2} {'type': 'loss', 'content': 0.1606442630290985, 'timestamp': '2025-10-01 04:29:29.188340', 'step': 9375, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:29:29.242851', 'step': 9375, 'epoch': 2} {'type': 'loss', 'content': 0.13424450159072876, 'timestamp': '2025-10-01 04:29:29.259366', 'step': 9376, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:29.333884', 'step': 9376, 'epoch': 2} {'type': 'loss', 'content': 0.12258145958185196, 'timestamp': '2025-10-01 04:29:29.336056', 'step': 9377, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:29:29.400825', 'step': 9377, 'epoch': 2} {'type': 'loss', 'content': 0.17184366285800934, 'timestamp': '2025-10-01 04:29:29.404085', 'step': 9378, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:29.458844', 'step': 9378, 'epoch': 2} {'type': 'loss', 'content': 0.1188691109418869, 'timestamp': '2025-10-01 04:29:29.461063', 'step': 9379, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:29.514611', 'step': 9379, 'epoch': 2} {'type': 'loss', 'content': 0.12298699468374252, 'timestamp': '2025-10-01 04:29:29.520882', 'step': 9380, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:29.579159', 'step': 9380, 'epoch': 2} {'type': 'loss', 'content': 0.18570920825004578, 'timestamp': '2025-10-01 04:29:29.581692', 'step': 9381, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:29.645486', 'step': 9381, 'epoch': 2} {'type': 'loss', 'content': 0.08385726064443588, 'timestamp': '2025-10-01 04:29:29.647702', 'step': 9382, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:29.711308', 'step': 9382, 'epoch': 2} {'type': 'loss', 'content': 0.11821569502353668, 'timestamp': '2025-10-01 04:29:29.713812', 'step': 9383, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:29.766768', 'step': 9383, 'epoch': 2} {'type': 'loss', 'content': 0.18591028451919556, 'timestamp': '2025-10-01 04:29:29.781791', 'step': 9384, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:29.843700', 'step': 9384, 'epoch': 2} {'type': 'loss', 'content': 0.18723830580711365, 'timestamp': '2025-10-01 04:29:29.853888', 'step': 9385, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:29:29.907350', 'step': 9385, 'epoch': 2} {'type': 'loss', 'content': 0.19030803442001343, 'timestamp': '2025-10-01 04:29:29.909868', 'step': 9386, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:29:29.963453', 'step': 9386, 'epoch': 2} {'type': 'loss', 'content': 0.0707872062921524, 'timestamp': '2025-10-01 04:29:29.965620', 'step': 9387, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:30.026063', 'step': 9387, 'epoch': 2} {'type': 'loss', 'content': 0.16925764083862305, 'timestamp': '2025-10-01 04:29:30.031969', 'step': 9388, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:30.084824', 'step': 9388, 'epoch': 2} {'type': 'loss', 'content': 0.14757980406284332, 'timestamp': '2025-10-01 04:29:30.087760', 'step': 9389, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:30.141069', 'step': 9389, 'epoch': 2} {'type': 'loss', 'content': 0.11860008537769318, 'timestamp': '2025-10-01 04:29:30.146649', 'step': 9390, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:29:30.200655', 'step': 9390, 'epoch': 2} {'type': 'loss', 'content': 0.17310753464698792, 'timestamp': '2025-10-01 04:29:30.205634', 'step': 9391, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:30.259498', 'step': 9391, 'epoch': 2} {'type': 'loss', 'content': 0.12259596586227417, 'timestamp': '2025-10-01 04:29:30.265658', 'step': 9392, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:30.328461', 'step': 9392, 'epoch': 2} {'type': 'loss', 'content': 0.1895582228899002, 'timestamp': '2025-10-01 04:29:30.330692', 'step': 9393, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:30.385232', 'step': 9393, 'epoch': 2} {'type': 'loss', 'content': 0.1360437572002411, 'timestamp': '2025-10-01 04:29:30.387428', 'step': 9394, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:30.448263', 'step': 9394, 'epoch': 2} {'type': 'loss', 'content': 0.1035245805978775, 'timestamp': '2025-10-01 04:29:30.450618', 'step': 9395, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:30.516032', 'step': 9395, 'epoch': 2} {'type': 'loss', 'content': 0.1893913745880127, 'timestamp': '2025-10-01 04:29:30.522986', 'step': 9396, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:29:30.581078', 'step': 9396, 'epoch': 2} {'type': 'loss', 'content': 0.09849987179040909, 'timestamp': '2025-10-01 04:29:30.583165', 'step': 9397, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:30.641244', 'step': 9397, 'epoch': 2} {'type': 'loss', 'content': 0.16335465013980865, 'timestamp': '2025-10-01 04:29:30.643434', 'step': 9398, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:30.702347', 'step': 9398, 'epoch': 2} {'type': 'loss', 'content': 0.17487648129463196, 'timestamp': '2025-10-01 04:29:30.704487', 'step': 9399, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:30.762803', 'step': 9399, 'epoch': 2} {'type': 'loss', 'content': 0.15199290215969086, 'timestamp': '2025-10-01 04:29:30.777047', 'step': 9400, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:30.835461', 'step': 9400, 'epoch': 2} {'type': 'loss', 'content': 0.21948465704917908, 'timestamp': '2025-10-01 04:29:30.845682', 'step': 9401, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:30.904191', 'step': 9401, 'epoch': 2} {'type': 'loss', 'content': 0.15849921107292175, 'timestamp': '2025-10-01 04:29:30.906502', 'step': 9402, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:30.964726', 'step': 9402, 'epoch': 2} {'type': 'loss', 'content': 0.11078684777021408, 'timestamp': '2025-10-01 04:29:30.966928', 'step': 9403, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:31.022372', 'step': 9403, 'epoch': 2} {'type': 'loss', 'content': 0.21663768589496613, 'timestamp': '2025-10-01 04:29:31.029183', 'step': 9404, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:31.083662', 'step': 9404, 'epoch': 2} {'type': 'loss', 'content': 0.21188828349113464, 'timestamp': '2025-10-01 04:29:31.085800', 'step': 9405, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:31.141638', 'step': 9405, 'epoch': 2} {'type': 'loss', 'content': 0.10943684726953506, 'timestamp': '2025-10-01 04:29:31.143772', 'step': 9406, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:31.197271', 'step': 9406, 'epoch': 2} {'type': 'loss', 'content': 0.15013983845710754, 'timestamp': '2025-10-01 04:29:31.200932', 'step': 9407, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:31.253951', 'step': 9407, 'epoch': 2} {'type': 'loss', 'content': 0.14518128335475922, 'timestamp': '2025-10-01 04:29:31.261059', 'step': 9408, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:31.325475', 'step': 9408, 'epoch': 2} {'type': 'loss', 'content': 0.25941023230552673, 'timestamp': '2025-10-01 04:29:31.339285', 'step': 9409, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:31.405434', 'step': 9409, 'epoch': 2} {'type': 'loss', 'content': 0.12100904434919357, 'timestamp': '2025-10-01 04:29:31.407558', 'step': 9410, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:31.462383', 'step': 9410, 'epoch': 2} {'type': 'loss', 'content': 0.24504926800727844, 'timestamp': '2025-10-01 04:29:31.464566', 'step': 9411, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:31.517317', 'step': 9411, 'epoch': 2} {'type': 'loss', 'content': 0.09353767335414886, 'timestamp': '2025-10-01 04:29:31.525454', 'step': 9412, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:29:31.578760', 'step': 9412, 'epoch': 2} {'type': 'loss', 'content': 0.13237108290195465, 'timestamp': '2025-10-01 04:29:31.580951', 'step': 9413, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:31.634271', 'step': 9413, 'epoch': 2} {'type': 'loss', 'content': 0.08423974364995956, 'timestamp': '2025-10-01 04:29:31.636616', 'step': 9414, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:31.689865', 'step': 9414, 'epoch': 2} {'type': 'loss', 'content': 0.10664425045251846, 'timestamp': '2025-10-01 04:29:31.691851', 'step': 9415, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:31.745427', 'step': 9415, 'epoch': 2} {'type': 'loss', 'content': 0.11531262844800949, 'timestamp': '2025-10-01 04:29:31.751284', 'step': 9416, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:31.804318', 'step': 9416, 'epoch': 2} {'type': 'loss', 'content': 0.1329139918088913, 'timestamp': '2025-10-01 04:29:31.806497', 'step': 9417, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:31.860384', 'step': 9417, 'epoch': 2} {'type': 'loss', 'content': 0.11966176331043243, 'timestamp': '2025-10-01 04:29:31.862602', 'step': 9418, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:31.928965', 'step': 9418, 'epoch': 2} {'type': 'loss', 'content': 0.11336357891559601, 'timestamp': '2025-10-01 04:29:31.932225', 'step': 9419, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:31.986590', 'step': 9419, 'epoch': 2} {'type': 'loss', 'content': 0.1521548628807068, 'timestamp': '2025-10-01 04:29:31.992852', 'step': 9420, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:32.047193', 'step': 9420, 'epoch': 2} {'type': 'loss', 'content': 0.07931272685527802, 'timestamp': '2025-10-01 04:29:32.049454', 'step': 9421, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:29:32.104692', 'step': 9421, 'epoch': 2} {'type': 'loss', 'content': 0.09534015506505966, 'timestamp': '2025-10-01 04:29:32.107867', 'step': 9422, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:32.165336', 'step': 9422, 'epoch': 2} {'type': 'loss', 'content': 0.0828133225440979, 'timestamp': '2025-10-01 04:29:32.180750', 'step': 9423, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:32.235145', 'step': 9423, 'epoch': 2} {'type': 'loss', 'content': 0.09488240629434586, 'timestamp': '2025-10-01 04:29:32.241083', 'step': 9424, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:32.298630', 'step': 9424, 'epoch': 2} {'type': 'loss', 'content': 0.0926518663764, 'timestamp': '2025-10-01 04:29:32.300813', 'step': 9425, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:32.362633', 'step': 9425, 'epoch': 2} {'type': 'loss', 'content': 0.09935110807418823, 'timestamp': '2025-10-01 04:29:32.365232', 'step': 9426, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:32.418399', 'step': 9426, 'epoch': 2} {'type': 'loss', 'content': 0.2664240300655365, 'timestamp': '2025-10-01 04:29:32.420775', 'step': 9427, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:32.474185', 'step': 9427, 'epoch': 2} {'type': 'loss', 'content': 0.12074748426675797, 'timestamp': '2025-10-01 04:29:32.481922', 'step': 9428, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:29:32.534874', 'step': 9428, 'epoch': 2} {'type': 'loss', 'content': 0.10014549642801285, 'timestamp': '2025-10-01 04:29:32.537153', 'step': 9429, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:32.594176', 'step': 9429, 'epoch': 2} {'type': 'loss', 'content': 0.12571600079536438, 'timestamp': '2025-10-01 04:29:32.596277', 'step': 9430, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:32.649941', 'step': 9430, 'epoch': 2} {'type': 'loss', 'content': 0.18806183338165283, 'timestamp': '2025-10-01 04:29:32.652120', 'step': 9431, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:32.706172', 'step': 9431, 'epoch': 2} {'type': 'loss', 'content': 0.09530962258577347, 'timestamp': '2025-10-01 04:29:32.711838', 'step': 9432, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:32.765742', 'step': 9432, 'epoch': 2} {'type': 'loss', 'content': 0.08875945955514908, 'timestamp': '2025-10-01 04:29:32.769008', 'step': 9433, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:32.827087', 'step': 9433, 'epoch': 2} {'type': 'loss', 'content': 0.11822947859764099, 'timestamp': '2025-10-01 04:29:32.829164', 'step': 9434, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:32.882543', 'step': 9434, 'epoch': 2} {'type': 'loss', 'content': 0.0848395973443985, 'timestamp': '2025-10-01 04:29:32.889972', 'step': 9435, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:29:32.943032', 'step': 9435, 'epoch': 2} {'type': 'loss', 'content': 0.11103897541761398, 'timestamp': '2025-10-01 04:29:32.949724', 'step': 9436, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:33.003835', 'step': 9436, 'epoch': 2} {'type': 'loss', 'content': 0.11867664754390717, 'timestamp': '2025-10-01 04:29:33.006135', 'step': 9437, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:33.060718', 'step': 9437, 'epoch': 2} {'type': 'loss', 'content': 0.12974132597446442, 'timestamp': '2025-10-01 04:29:33.064251', 'step': 9438, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:33.118871', 'step': 9438, 'epoch': 2} {'type': 'loss', 'content': 0.1822969764471054, 'timestamp': '2025-10-01 04:29:33.121078', 'step': 9439, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:33.174336', 'step': 9439, 'epoch': 2} {'type': 'loss', 'content': 0.11878836899995804, 'timestamp': '2025-10-01 04:29:33.180215', 'step': 9440, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:33.234266', 'step': 9440, 'epoch': 2} {'type': 'loss', 'content': 0.10957178473472595, 'timestamp': '2025-10-01 04:29:33.236515', 'step': 9441, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:29:33.289904', 'step': 9441, 'epoch': 2} {'type': 'loss', 'content': 0.24025148153305054, 'timestamp': '2025-10-01 04:29:33.303774', 'step': 9442, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:33.359604', 'step': 9442, 'epoch': 2} {'type': 'loss', 'content': 0.1313365250825882, 'timestamp': '2025-10-01 04:29:33.362049', 'step': 9443, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:33.415818', 'step': 9443, 'epoch': 2} {'type': 'loss', 'content': 0.1783132255077362, 'timestamp': '2025-10-01 04:29:33.421620', 'step': 9444, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:33.490899', 'step': 9444, 'epoch': 2} {'type': 'loss', 'content': 0.09158755838871002, 'timestamp': '2025-10-01 04:29:33.493520', 'step': 9445, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:33.546563', 'step': 9445, 'epoch': 2} {'type': 'loss', 'content': 0.10152070969343185, 'timestamp': '2025-10-01 04:29:33.548818', 'step': 9446, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:33.602157', 'step': 9446, 'epoch': 2} {'type': 'loss', 'content': 0.17203176021575928, 'timestamp': '2025-10-01 04:29:33.604421', 'step': 9447, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:33.658348', 'step': 9447, 'epoch': 2} {'type': 'loss', 'content': 0.1487981230020523, 'timestamp': '2025-10-01 04:29:33.664100', 'step': 9448, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:33.717806', 'step': 9448, 'epoch': 2} {'type': 'loss', 'content': 0.1978764683008194, 'timestamp': '2025-10-01 04:29:33.719923', 'step': 9449, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:33.773497', 'step': 9449, 'epoch': 2} {'type': 'loss', 'content': 0.09184923022985458, 'timestamp': '2025-10-01 04:29:33.775897', 'step': 9450, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:29:33.830603', 'step': 9450, 'epoch': 2} {'type': 'loss', 'content': 0.15441301465034485, 'timestamp': '2025-10-01 04:29:33.833150', 'step': 9451, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:33.888308', 'step': 9451, 'epoch': 2} {'type': 'loss', 'content': 0.09095635265111923, 'timestamp': '2025-10-01 04:29:33.897005', 'step': 9452, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:33.949770', 'step': 9452, 'epoch': 2} {'type': 'loss', 'content': 0.08988586813211441, 'timestamp': '2025-10-01 04:29:33.952312', 'step': 9453, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:34.005644', 'step': 9453, 'epoch': 2} {'type': 'loss', 'content': 0.14970433712005615, 'timestamp': '2025-10-01 04:29:34.008570', 'step': 9454, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:34.063761', 'step': 9454, 'epoch': 2} {'type': 'loss', 'content': 0.1809232383966446, 'timestamp': '2025-10-01 04:29:34.066098', 'step': 9455, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:34.121156', 'step': 9455, 'epoch': 2} {'type': 'loss', 'content': 0.16807708144187927, 'timestamp': '2025-10-01 04:29:34.127442', 'step': 9456, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:29:34.180001', 'step': 9456, 'epoch': 2} {'type': 'loss', 'content': 0.1815553903579712, 'timestamp': '2025-10-01 04:29:34.182353', 'step': 9457, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:34.235767', 'step': 9457, 'epoch': 2} {'type': 'loss', 'content': 0.1259644329547882, 'timestamp': '2025-10-01 04:29:34.238003', 'step': 9458, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:34.291438', 'step': 9458, 'epoch': 2} {'type': 'loss', 'content': 0.1274326592683792, 'timestamp': '2025-10-01 04:29:34.293726', 'step': 9459, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:34.354567', 'step': 9459, 'epoch': 2} {'type': 'loss', 'content': 0.1014258936047554, 'timestamp': '2025-10-01 04:29:34.361202', 'step': 9460, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:34.426351', 'step': 9460, 'epoch': 2} {'type': 'loss', 'content': 0.06196453794836998, 'timestamp': '2025-10-01 04:29:34.428855', 'step': 9461, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:34.483163', 'step': 9461, 'epoch': 2} {'type': 'loss', 'content': 0.1672108918428421, 'timestamp': '2025-10-01 04:29:34.485522', 'step': 9462, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:34.540916', 'step': 9462, 'epoch': 2} {'type': 'loss', 'content': 0.15674863755702972, 'timestamp': '2025-10-01 04:29:34.543271', 'step': 9463, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:29:34.598426', 'step': 9463, 'epoch': 2} {'type': 'loss', 'content': 0.2145053893327713, 'timestamp': '2025-10-01 04:29:34.604539', 'step': 9464, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:34.658729', 'step': 9464, 'epoch': 2} {'type': 'loss', 'content': 0.09261883050203323, 'timestamp': '2025-10-01 04:29:34.661380', 'step': 9465, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:34.728557', 'step': 9465, 'epoch': 2} {'type': 'loss', 'content': 0.08817044645547867, 'timestamp': '2025-10-01 04:29:34.731582', 'step': 9466, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:29:34.786607', 'step': 9466, 'epoch': 2} {'type': 'loss', 'content': 0.16743071377277374, 'timestamp': '2025-10-01 04:29:34.788904', 'step': 9467, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:29:34.843575', 'step': 9467, 'epoch': 2} {'type': 'loss', 'content': 0.19165082275867462, 'timestamp': '2025-10-01 04:29:34.849937', 'step': 9468, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:34.905075', 'step': 9468, 'epoch': 2} {'type': 'loss', 'content': 0.11987753957509995, 'timestamp': '2025-10-01 04:29:34.907377', 'step': 9469, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:34.961803', 'step': 9469, 'epoch': 2} {'type': 'loss', 'content': 0.17922191321849823, 'timestamp': '2025-10-01 04:29:34.964822', 'step': 9470, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:35.019587', 'step': 9470, 'epoch': 2} {'type': 'loss', 'content': 0.09018579125404358, 'timestamp': '2025-10-01 04:29:35.022298', 'step': 9471, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:35.076718', 'step': 9471, 'epoch': 2} {'type': 'loss', 'content': 0.12608502805233002, 'timestamp': '2025-10-01 04:29:35.082738', 'step': 9472, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:29:35.137033', 'step': 9472, 'epoch': 2} {'type': 'loss', 'content': 0.2197493463754654, 'timestamp': '2025-10-01 04:29:35.139181', 'step': 9473, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:35.193181', 'step': 9473, 'epoch': 2} {'type': 'loss', 'content': 0.17266851663589478, 'timestamp': '2025-10-01 04:29:35.213550', 'step': 9474, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:35.279091', 'step': 9474, 'epoch': 2} {'type': 'loss', 'content': 0.11833445727825165, 'timestamp': '2025-10-01 04:29:35.281293', 'step': 9475, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:35.337230', 'step': 9475, 'epoch': 2} {'type': 'loss', 'content': 0.06236547604203224, 'timestamp': '2025-10-01 04:29:35.342862', 'step': 9476, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:35.396878', 'step': 9476, 'epoch': 2} {'type': 'loss', 'content': 0.09172691404819489, 'timestamp': '2025-10-01 04:29:35.402124', 'step': 9477, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:29:35.455892', 'step': 9477, 'epoch': 2} {'type': 'loss', 'content': 0.16714529693126678, 'timestamp': '2025-10-01 04:29:35.468173', 'step': 9478, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:35.533579', 'step': 9478, 'epoch': 2} {'type': 'loss', 'content': 0.09766392409801483, 'timestamp': '2025-10-01 04:29:35.536385', 'step': 9479, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:35.590390', 'step': 9479, 'epoch': 2} {'type': 'loss', 'content': 0.24028128385543823, 'timestamp': '2025-10-01 04:29:35.597283', 'step': 9480, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:35.650817', 'step': 9480, 'epoch': 2} {'type': 'loss', 'content': 0.1182735413312912, 'timestamp': '2025-10-01 04:29:35.654104', 'step': 9481, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:35.708921', 'step': 9481, 'epoch': 2} {'type': 'loss', 'content': 0.15095777809619904, 'timestamp': '2025-10-01 04:29:35.720613', 'step': 9482, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:35.776467', 'step': 9482, 'epoch': 2} {'type': 'loss', 'content': 0.1907854825258255, 'timestamp': '2025-10-01 04:29:35.783273', 'step': 9483, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:35.854697', 'step': 9483, 'epoch': 2} {'type': 'loss', 'content': 0.11351870745420456, 'timestamp': '2025-10-01 04:29:35.861644', 'step': 9484, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:29:35.914999', 'step': 9484, 'epoch': 2} {'type': 'loss', 'content': 0.18628038465976715, 'timestamp': '2025-10-01 04:29:35.917126', 'step': 9485, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:29:35.970373', 'step': 9485, 'epoch': 2} {'type': 'loss', 'content': 0.10637611150741577, 'timestamp': '2025-10-01 04:29:35.972496', 'step': 9486, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:36.032837', 'step': 9486, 'epoch': 2} {'type': 'loss', 'content': 0.11574367433786392, 'timestamp': '2025-10-01 04:29:36.036949', 'step': 9487, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:36.090657', 'step': 9487, 'epoch': 2} {'type': 'loss', 'content': 0.12429717183113098, 'timestamp': '2025-10-01 04:29:36.096368', 'step': 9488, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:36.148649', 'step': 9488, 'epoch': 2} {'type': 'loss', 'content': 0.11674319207668304, 'timestamp': '2025-10-01 04:29:36.150714', 'step': 9489, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:36.218199', 'step': 9489, 'epoch': 2} {'type': 'loss', 'content': 0.1136762797832489, 'timestamp': '2025-10-01 04:29:36.220529', 'step': 9490, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:36.273522', 'step': 9490, 'epoch': 2} {'type': 'loss', 'content': 0.062066178768873215, 'timestamp': '2025-10-01 04:29:36.278573', 'step': 9491, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:36.331252', 'step': 9491, 'epoch': 2} {'type': 'loss', 'content': 0.0930890142917633, 'timestamp': '2025-10-01 04:29:36.336939', 'step': 9492, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:36.391294', 'step': 9492, 'epoch': 2} {'type': 'loss', 'content': 0.24566881358623505, 'timestamp': '2025-10-01 04:29:36.393397', 'step': 9493, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:36.460584', 'step': 9493, 'epoch': 2} {'type': 'loss', 'content': 0.10906171798706055, 'timestamp': '2025-10-01 04:29:36.463921', 'step': 9494, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:36.524807', 'step': 9494, 'epoch': 2} {'type': 'loss', 'content': 0.06351976841688156, 'timestamp': '2025-10-01 04:29:36.527778', 'step': 9495, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:36.585787', 'step': 9495, 'epoch': 2} {'type': 'loss', 'content': 0.20039616525173187, 'timestamp': '2025-10-01 04:29:36.591479', 'step': 9496, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:36.656548', 'step': 9496, 'epoch': 2} {'type': 'loss', 'content': 0.10712413489818573, 'timestamp': '2025-10-01 04:29:36.658664', 'step': 9497, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:36.711941', 'step': 9497, 'epoch': 2} {'type': 'loss', 'content': 0.1870674043893814, 'timestamp': '2025-10-01 04:29:36.714110', 'step': 9498, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:36.767350', 'step': 9498, 'epoch': 2} {'type': 'loss', 'content': 0.1712634414434433, 'timestamp': '2025-10-01 04:29:36.769548', 'step': 9499, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:36.822823', 'step': 9499, 'epoch': 2} {'type': 'loss', 'content': 0.11563476920127869, 'timestamp': '2025-10-01 04:29:36.828600', 'step': 9500, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 9500', 'timestamp': '2025-10-01 04:29:37.191530', 'step': 9500, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:37.243664', 'step': 9500, 'epoch': 2} {'type': 'loss', 'content': 0.1692124307155609, 'timestamp': '2025-10-01 04:29:37.246016', 'step': 9501, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:37.311041', 'step': 9501, 'epoch': 2} {'type': 'loss', 'content': 0.10877875983715057, 'timestamp': '2025-10-01 04:29:37.313049', 'step': 9502, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:37.366012', 'step': 9502, 'epoch': 2} {'type': 'loss', 'content': 0.16735287010669708, 'timestamp': '2025-10-01 04:29:37.367989', 'step': 9503, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:29:37.420800', 'step': 9503, 'epoch': 2} {'type': 'loss', 'content': 0.07359593361616135, 'timestamp': '2025-10-01 04:29:37.427491', 'step': 9504, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:29:37.480385', 'step': 9504, 'epoch': 2} {'type': 'loss', 'content': 0.1523754745721817, 'timestamp': '2025-10-01 04:29:37.482638', 'step': 9505, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:37.535877', 'step': 9505, 'epoch': 2} {'type': 'loss', 'content': 0.06147412210702896, 'timestamp': '2025-10-01 04:29:37.538030', 'step': 9506, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:37.591520', 'step': 9506, 'epoch': 2} {'type': 'loss', 'content': 0.15075531601905823, 'timestamp': '2025-10-01 04:29:37.593777', 'step': 9507, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:37.648941', 'step': 9507, 'epoch': 2} {'type': 'loss', 'content': 0.15564747154712677, 'timestamp': '2025-10-01 04:29:37.656385', 'step': 9508, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:37.710531', 'step': 9508, 'epoch': 2} {'type': 'loss', 'content': 0.07431576400995255, 'timestamp': '2025-10-01 04:29:37.712812', 'step': 9509, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:37.767276', 'step': 9509, 'epoch': 2} {'type': 'loss', 'content': 0.13156841695308685, 'timestamp': '2025-10-01 04:29:37.781275', 'step': 9510, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:37.838907', 'step': 9510, 'epoch': 2} {'type': 'loss', 'content': 0.12479403614997864, 'timestamp': '2025-10-01 04:29:37.843836', 'step': 9511, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:29:37.897569', 'step': 9511, 'epoch': 2} {'type': 'loss', 'content': 0.14147146046161652, 'timestamp': '2025-10-01 04:29:37.903455', 'step': 9512, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:37.956757', 'step': 9512, 'epoch': 2} {'type': 'loss', 'content': 0.24686914682388306, 'timestamp': '2025-10-01 04:29:37.958953', 'step': 9513, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:29:38.012900', 'step': 9513, 'epoch': 2} {'type': 'loss', 'content': 0.09824104607105255, 'timestamp': '2025-10-01 04:29:38.014995', 'step': 9514, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:38.067381', 'step': 9514, 'epoch': 2} {'type': 'loss', 'content': 0.09426457434892654, 'timestamp': '2025-10-01 04:29:38.069649', 'step': 9515, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:38.126905', 'step': 9515, 'epoch': 2} {'type': 'loss', 'content': 0.0789175033569336, 'timestamp': '2025-10-01 04:29:38.132610', 'step': 9516, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:38.186474', 'step': 9516, 'epoch': 2} {'type': 'loss', 'content': 0.0983080267906189, 'timestamp': '2025-10-01 04:29:38.188645', 'step': 9517, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:29:38.243442', 'step': 9517, 'epoch': 2} {'type': 'loss', 'content': 0.18364079296588898, 'timestamp': '2025-10-01 04:29:38.248395', 'step': 9518, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:38.304728', 'step': 9518, 'epoch': 2} {'type': 'loss', 'content': 0.10780993103981018, 'timestamp': '2025-10-01 04:29:38.307342', 'step': 9519, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:38.361118', 'step': 9519, 'epoch': 2} {'type': 'loss', 'content': 0.10879907757043839, 'timestamp': '2025-10-01 04:29:38.366787', 'step': 9520, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:38.419312', 'step': 9520, 'epoch': 2} {'type': 'loss', 'content': 0.10932902991771698, 'timestamp': '2025-10-01 04:29:38.421268', 'step': 9521, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:38.483424', 'step': 9521, 'epoch': 2} {'type': 'loss', 'content': 0.09182056039571762, 'timestamp': '2025-10-01 04:29:38.485671', 'step': 9522, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:38.539130', 'step': 9522, 'epoch': 2} {'type': 'loss', 'content': 0.1141442283987999, 'timestamp': '2025-10-01 04:29:38.546061', 'step': 9523, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:38.603528', 'step': 9523, 'epoch': 2} {'type': 'loss', 'content': 0.17461587488651276, 'timestamp': '2025-10-01 04:29:38.609092', 'step': 9524, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:29:38.661704', 'step': 9524, 'epoch': 2} {'type': 'loss', 'content': 0.14283141493797302, 'timestamp': '2025-10-01 04:29:38.663678', 'step': 9525, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:29:38.716255', 'step': 9525, 'epoch': 2} {'type': 'loss', 'content': 0.10077747702598572, 'timestamp': '2025-10-01 04:29:38.718365', 'step': 9526, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:38.781749', 'step': 9526, 'epoch': 2} {'type': 'loss', 'content': 0.16759951412677765, 'timestamp': '2025-10-01 04:29:38.783776', 'step': 9527, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:38.839409', 'step': 9527, 'epoch': 2} {'type': 'loss', 'content': 0.12065795809030533, 'timestamp': '2025-10-01 04:29:38.845166', 'step': 9528, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:38.898808', 'step': 9528, 'epoch': 2} {'type': 'loss', 'content': 0.15805816650390625, 'timestamp': '2025-10-01 04:29:38.900634', 'step': 9529, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:38.956380', 'step': 9529, 'epoch': 2} {'type': 'loss', 'content': 0.11196824908256531, 'timestamp': '2025-10-01 04:29:38.958565', 'step': 9530, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:39.037676', 'step': 9530, 'epoch': 2} {'type': 'loss', 'content': 0.18634694814682007, 'timestamp': '2025-10-01 04:29:39.039500', 'step': 9531, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:39.093305', 'step': 9531, 'epoch': 2} {'type': 'loss', 'content': 0.17361383140087128, 'timestamp': '2025-10-01 04:29:39.098916', 'step': 9532, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:39.156550', 'step': 9532, 'epoch': 2} {'type': 'loss', 'content': 0.1352919489145279, 'timestamp': '2025-10-01 04:29:39.158655', 'step': 9533, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:29:39.213014', 'step': 9533, 'epoch': 2} {'type': 'loss', 'content': 0.08965259790420532, 'timestamp': '2025-10-01 04:29:39.232839', 'step': 9534, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:39.296851', 'step': 9534, 'epoch': 2} {'type': 'loss', 'content': 0.09465127438306808, 'timestamp': '2025-10-01 04:29:39.304485', 'step': 9535, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:39.372635', 'step': 9535, 'epoch': 2} {'type': 'loss', 'content': 0.09670303016901016, 'timestamp': '2025-10-01 04:29:39.381287', 'step': 9536, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:39.453797', 'step': 9536, 'epoch': 2} {'type': 'loss', 'content': 0.20521238446235657, 'timestamp': '2025-10-01 04:29:39.461417', 'step': 9537, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:39.524760', 'step': 9537, 'epoch': 2} {'type': 'loss', 'content': 0.1925210803747177, 'timestamp': '2025-10-01 04:29:39.538241', 'step': 9538, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:29:39.611822', 'step': 9538, 'epoch': 2} {'type': 'loss', 'content': 0.1131030023097992, 'timestamp': '2025-10-01 04:29:39.614866', 'step': 9539, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:39.683916', 'step': 9539, 'epoch': 2} {'type': 'loss', 'content': 0.12738285958766937, 'timestamp': '2025-10-01 04:29:39.689957', 'step': 9540, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:39.746137', 'step': 9540, 'epoch': 2} {'type': 'loss', 'content': 0.13002628087997437, 'timestamp': '2025-10-01 04:29:39.767886', 'step': 9541, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:39.843637', 'step': 9541, 'epoch': 2} {'type': 'loss', 'content': 0.11235867440700531, 'timestamp': '2025-10-01 04:29:39.849660', 'step': 9542, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:39.913194', 'step': 9542, 'epoch': 2} {'type': 'loss', 'content': 0.1471787542104721, 'timestamp': '2025-10-01 04:29:39.916018', 'step': 9543, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:39.983401', 'step': 9543, 'epoch': 2} {'type': 'loss', 'content': 0.08171617239713669, 'timestamp': '2025-10-01 04:29:40.011851', 'step': 9544, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:40.076490', 'step': 9544, 'epoch': 2} {'type': 'loss', 'content': 0.08890584111213684, 'timestamp': '2025-10-01 04:29:40.088881', 'step': 9545, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:40.159322', 'step': 9545, 'epoch': 2} {'type': 'loss', 'content': 0.0931442379951477, 'timestamp': '2025-10-01 04:29:40.166770', 'step': 9546, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:40.225133', 'step': 9546, 'epoch': 2} {'type': 'loss', 'content': 0.1652143895626068, 'timestamp': '2025-10-01 04:29:40.233662', 'step': 9547, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:40.299914', 'step': 9547, 'epoch': 2} {'type': 'loss', 'content': 0.07918715476989746, 'timestamp': '2025-10-01 04:29:40.310313', 'step': 9548, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:40.382743', 'step': 9548, 'epoch': 2} {'type': 'loss', 'content': 0.13798211514949799, 'timestamp': '2025-10-01 04:29:40.385078', 'step': 9549, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:40.457410', 'step': 9549, 'epoch': 2} {'type': 'loss', 'content': 0.06506817787885666, 'timestamp': '2025-10-01 04:29:40.459551', 'step': 9550, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:40.513127', 'step': 9550, 'epoch': 2} {'type': 'loss', 'content': 0.15652917325496674, 'timestamp': '2025-10-01 04:29:40.515345', 'step': 9551, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:40.569075', 'step': 9551, 'epoch': 2} {'type': 'loss', 'content': 0.168013334274292, 'timestamp': '2025-10-01 04:29:40.575062', 'step': 9552, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:40.627735', 'step': 9552, 'epoch': 2} {'type': 'loss', 'content': 0.1990508735179901, 'timestamp': '2025-10-01 04:29:40.629904', 'step': 9553, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:40.683205', 'step': 9553, 'epoch': 2} {'type': 'loss', 'content': 0.17070770263671875, 'timestamp': '2025-10-01 04:29:40.685223', 'step': 9554, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:40.738313', 'step': 9554, 'epoch': 2} {'type': 'loss', 'content': 0.08194184303283691, 'timestamp': '2025-10-01 04:29:40.740435', 'step': 9555, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:40.797941', 'step': 9555, 'epoch': 2} {'type': 'loss', 'content': 0.08755377680063248, 'timestamp': '2025-10-01 04:29:40.803459', 'step': 9556, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:40.856843', 'step': 9556, 'epoch': 2} {'type': 'loss', 'content': 0.14765912294387817, 'timestamp': '2025-10-01 04:29:40.858782', 'step': 9557, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:40.911332', 'step': 9557, 'epoch': 2} {'type': 'loss', 'content': 0.18789437413215637, 'timestamp': '2025-10-01 04:29:40.913211', 'step': 9558, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:40.966088', 'step': 9558, 'epoch': 2} {'type': 'loss', 'content': 0.13332119584083557, 'timestamp': '2025-10-01 04:29:40.968267', 'step': 9559, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:41.033933', 'step': 9559, 'epoch': 2} {'type': 'loss', 'content': 0.11496761441230774, 'timestamp': '2025-10-01 04:29:41.041216', 'step': 9560, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:41.097758', 'step': 9560, 'epoch': 2} {'type': 'loss', 'content': 0.1398439109325409, 'timestamp': '2025-10-01 04:29:41.099819', 'step': 9561, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:41.153044', 'step': 9561, 'epoch': 2} {'type': 'loss', 'content': 0.16610243916511536, 'timestamp': '2025-10-01 04:29:41.155170', 'step': 9562, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:29:41.208633', 'step': 9562, 'epoch': 2} {'type': 'loss', 'content': 0.17095723748207092, 'timestamp': '2025-10-01 04:29:41.210717', 'step': 9563, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:29:41.263705', 'step': 9563, 'epoch': 2} {'type': 'loss', 'content': 0.13211028277873993, 'timestamp': '2025-10-01 04:29:41.269337', 'step': 9564, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:41.321746', 'step': 9564, 'epoch': 2} {'type': 'loss', 'content': 0.1631101816892624, 'timestamp': '2025-10-01 04:29:41.323932', 'step': 9565, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:41.376797', 'step': 9565, 'epoch': 2} {'type': 'loss', 'content': 0.17140693962574005, 'timestamp': '2025-10-01 04:29:41.379063', 'step': 9566, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:41.432404', 'step': 9566, 'epoch': 2} {'type': 'loss', 'content': 0.1381511390209198, 'timestamp': '2025-10-01 04:29:41.434573', 'step': 9567, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:29:41.487318', 'step': 9567, 'epoch': 2} {'type': 'loss', 'content': 0.13794516026973724, 'timestamp': '2025-10-01 04:29:41.493047', 'step': 9568, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:41.546893', 'step': 9568, 'epoch': 2} {'type': 'loss', 'content': 0.12557819485664368, 'timestamp': '2025-10-01 04:29:41.549085', 'step': 9569, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:41.601785', 'step': 9569, 'epoch': 2} {'type': 'loss', 'content': 0.06343361735343933, 'timestamp': '2025-10-01 04:29:41.603902', 'step': 9570, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:41.656564', 'step': 9570, 'epoch': 2} {'type': 'loss', 'content': 0.1960962563753128, 'timestamp': '2025-10-01 04:29:41.658493', 'step': 9571, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:41.712816', 'step': 9571, 'epoch': 2} {'type': 'loss', 'content': 0.12983334064483643, 'timestamp': '2025-10-01 04:29:41.718545', 'step': 9572, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:41.770685', 'step': 9572, 'epoch': 2} {'type': 'loss', 'content': 0.10434074699878693, 'timestamp': '2025-10-01 04:29:41.772782', 'step': 9573, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:41.831603', 'step': 9573, 'epoch': 2} {'type': 'loss', 'content': 0.12500964105129242, 'timestamp': '2025-10-01 04:29:41.833772', 'step': 9574, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:29:41.886751', 'step': 9574, 'epoch': 2} {'type': 'loss', 'content': 0.20731595158576965, 'timestamp': '2025-10-01 04:29:41.888891', 'step': 9575, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:41.942148', 'step': 9575, 'epoch': 2} {'type': 'loss', 'content': 0.1626041829586029, 'timestamp': '2025-10-01 04:29:41.947885', 'step': 9576, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:29:42.000651', 'step': 9576, 'epoch': 2} {'type': 'loss', 'content': 0.14493994414806366, 'timestamp': '2025-10-01 04:29:42.002880', 'step': 9577, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:42.065655', 'step': 9577, 'epoch': 2} {'type': 'loss', 'content': 0.10952319204807281, 'timestamp': '2025-10-01 04:29:42.068132', 'step': 9578, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:42.121377', 'step': 9578, 'epoch': 2} {'type': 'loss', 'content': 0.10679108649492264, 'timestamp': '2025-10-01 04:29:42.123461', 'step': 9579, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:42.176618', 'step': 9579, 'epoch': 2} {'type': 'loss', 'content': 0.10088051855564117, 'timestamp': '2025-10-01 04:29:42.182376', 'step': 9580, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:42.235067', 'step': 9580, 'epoch': 2} {'type': 'loss', 'content': 0.12396171689033508, 'timestamp': '2025-10-01 04:29:42.237302', 'step': 9581, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:29:42.290005', 'step': 9581, 'epoch': 2} {'type': 'loss', 'content': 0.1416866034269333, 'timestamp': '2025-10-01 04:29:42.292198', 'step': 9582, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:42.345161', 'step': 9582, 'epoch': 2} {'type': 'loss', 'content': 0.1177763119339943, 'timestamp': '2025-10-01 04:29:42.347230', 'step': 9583, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:42.400186', 'step': 9583, 'epoch': 2} {'type': 'loss', 'content': 0.09656364470720291, 'timestamp': '2025-10-01 04:29:42.405924', 'step': 9584, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:42.458584', 'step': 9584, 'epoch': 2} {'type': 'loss', 'content': 0.135516956448555, 'timestamp': '2025-10-01 04:29:42.460851', 'step': 9585, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:42.515760', 'step': 9585, 'epoch': 2} {'type': 'loss', 'content': 0.14561061561107635, 'timestamp': '2025-10-01 04:29:42.517785', 'step': 9586, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:42.571724', 'step': 9586, 'epoch': 2} {'type': 'loss', 'content': 0.09463071078062057, 'timestamp': '2025-10-01 04:29:42.574782', 'step': 9587, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:42.628990', 'step': 9587, 'epoch': 2} {'type': 'loss', 'content': 0.24271392822265625, 'timestamp': '2025-10-01 04:29:42.634722', 'step': 9588, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:29:42.688831', 'step': 9588, 'epoch': 2} {'type': 'loss', 'content': 0.1378317028284073, 'timestamp': '2025-10-01 04:29:42.691054', 'step': 9589, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:42.746468', 'step': 9589, 'epoch': 2} {'type': 'loss', 'content': 0.08526600897312164, 'timestamp': '2025-10-01 04:29:42.748794', 'step': 9590, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:42.803215', 'step': 9590, 'epoch': 2} {'type': 'loss', 'content': 0.1148703470826149, 'timestamp': '2025-10-01 04:29:42.806395', 'step': 9591, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:42.862928', 'step': 9591, 'epoch': 2} {'type': 'loss', 'content': 0.1858290135860443, 'timestamp': '2025-10-01 04:29:42.876792', 'step': 9592, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:29:42.930114', 'step': 9592, 'epoch': 2} {'type': 'loss', 'content': 0.07611582428216934, 'timestamp': '2025-10-01 04:29:42.932538', 'step': 9593, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:42.985711', 'step': 9593, 'epoch': 2} {'type': 'loss', 'content': 0.06881916522979736, 'timestamp': '2025-10-01 04:29:42.988479', 'step': 9594, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:29:43.042923', 'step': 9594, 'epoch': 2} {'type': 'loss', 'content': 0.2848237454891205, 'timestamp': '2025-10-01 04:29:43.046203', 'step': 9595, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:43.110382', 'step': 9595, 'epoch': 2} {'type': 'loss', 'content': 0.06133970990777016, 'timestamp': '2025-10-01 04:29:43.116094', 'step': 9596, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:43.170161', 'step': 9596, 'epoch': 2} {'type': 'loss', 'content': 0.12846805155277252, 'timestamp': '2025-10-01 04:29:43.172661', 'step': 9597, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:43.227051', 'step': 9597, 'epoch': 2} {'type': 'loss', 'content': 0.20920884609222412, 'timestamp': '2025-10-01 04:29:43.229429', 'step': 9598, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:43.284610', 'step': 9598, 'epoch': 2} {'type': 'loss', 'content': 0.08283040672540665, 'timestamp': '2025-10-01 04:29:43.286896', 'step': 9599, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:43.341013', 'step': 9599, 'epoch': 2} {'type': 'loss', 'content': 0.09748917073011398, 'timestamp': '2025-10-01 04:29:43.346897', 'step': 9600, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:43.400478', 'step': 9600, 'epoch': 2} {'type': 'loss', 'content': 0.12066001445055008, 'timestamp': '2025-10-01 04:29:43.402588', 'step': 9601, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:43.456740', 'step': 9601, 'epoch': 2} {'type': 'loss', 'content': 0.10397392511367798, 'timestamp': '2025-10-01 04:29:43.458847', 'step': 9602, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:29:43.513245', 'step': 9602, 'epoch': 2} {'type': 'loss', 'content': 0.12886007130146027, 'timestamp': '2025-10-01 04:29:43.515213', 'step': 9603, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:43.585016', 'step': 9603, 'epoch': 2} {'type': 'loss', 'content': 0.12789322435855865, 'timestamp': '2025-10-01 04:29:43.590982', 'step': 9604, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:43.653855', 'step': 9604, 'epoch': 2} {'type': 'loss', 'content': 0.13605643808841705, 'timestamp': '2025-10-01 04:29:43.656368', 'step': 9605, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:43.710422', 'step': 9605, 'epoch': 2} {'type': 'loss', 'content': 0.11466412991285324, 'timestamp': '2025-10-01 04:29:43.712661', 'step': 9606, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:43.766826', 'step': 9606, 'epoch': 2} {'type': 'loss', 'content': 0.11712019145488739, 'timestamp': '2025-10-01 04:29:43.769251', 'step': 9607, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:43.823191', 'step': 9607, 'epoch': 2} {'type': 'loss', 'content': 0.10232219845056534, 'timestamp': '2025-10-01 04:29:43.828936', 'step': 9608, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:43.895962', 'step': 9608, 'epoch': 2} {'type': 'loss', 'content': 0.09452187269926071, 'timestamp': '2025-10-01 04:29:43.898179', 'step': 9609, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:43.951141', 'step': 9609, 'epoch': 2} {'type': 'loss', 'content': 0.09588073194026947, 'timestamp': '2025-10-01 04:29:43.953296', 'step': 9610, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:44.007371', 'step': 9610, 'epoch': 2} {'type': 'loss', 'content': 0.16664941608905792, 'timestamp': '2025-10-01 04:29:44.009608', 'step': 9611, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:44.063642', 'step': 9611, 'epoch': 2} {'type': 'loss', 'content': 0.11867984384298325, 'timestamp': '2025-10-01 04:29:44.069978', 'step': 9612, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:29:44.123606', 'step': 9612, 'epoch': 2} {'type': 'loss', 'content': 0.08849292993545532, 'timestamp': '2025-10-01 04:29:44.125800', 'step': 9613, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:44.179219', 'step': 9613, 'epoch': 2} {'type': 'loss', 'content': 0.09002881497144699, 'timestamp': '2025-10-01 04:29:44.181455', 'step': 9614, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:44.236078', 'step': 9614, 'epoch': 2} {'type': 'loss', 'content': 0.1140189841389656, 'timestamp': '2025-10-01 04:29:44.238196', 'step': 9615, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:44.292864', 'step': 9615, 'epoch': 2} {'type': 'loss', 'content': 0.1495206654071808, 'timestamp': '2025-10-01 04:29:44.299299', 'step': 9616, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:44.367030', 'step': 9616, 'epoch': 2} {'type': 'loss', 'content': 0.1452300250530243, 'timestamp': '2025-10-01 04:29:44.369325', 'step': 9617, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:29:44.424935', 'step': 9617, 'epoch': 2} {'type': 'loss', 'content': 0.046225305646657944, 'timestamp': '2025-10-01 04:29:44.426861', 'step': 9618, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:44.480526', 'step': 9618, 'epoch': 2} {'type': 'loss', 'content': 0.17902003228664398, 'timestamp': '2025-10-01 04:29:44.482336', 'step': 9619, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:44.536213', 'step': 9619, 'epoch': 2} {'type': 'loss', 'content': 0.12500986456871033, 'timestamp': '2025-10-01 04:29:44.542121', 'step': 9620, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:44.600967', 'step': 9620, 'epoch': 2} {'type': 'loss', 'content': 0.11135132610797882, 'timestamp': '2025-10-01 04:29:44.604122', 'step': 9621, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:44.657452', 'step': 9621, 'epoch': 2} {'type': 'loss', 'content': 0.19519957900047302, 'timestamp': '2025-10-01 04:29:44.659547', 'step': 9622, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:44.713552', 'step': 9622, 'epoch': 2} {'type': 'loss', 'content': 0.1676158607006073, 'timestamp': '2025-10-01 04:29:44.715834', 'step': 9623, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:44.768746', 'step': 9623, 'epoch': 2} {'type': 'loss', 'content': 0.15896213054656982, 'timestamp': '2025-10-01 04:29:44.774751', 'step': 9624, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:44.827592', 'step': 9624, 'epoch': 2} {'type': 'loss', 'content': 0.13297952711582184, 'timestamp': '2025-10-01 04:29:44.829977', 'step': 9625, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:44.883139', 'step': 9625, 'epoch': 2} {'type': 'loss', 'content': 0.09880323708057404, 'timestamp': '2025-10-01 04:29:44.885269', 'step': 9626, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:44.938502', 'step': 9626, 'epoch': 2} {'type': 'loss', 'content': 0.12836997210979462, 'timestamp': '2025-10-01 04:29:44.940831', 'step': 9627, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:44.995310', 'step': 9627, 'epoch': 2} {'type': 'loss', 'content': 0.24048908054828644, 'timestamp': '2025-10-01 04:29:45.001306', 'step': 9628, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:45.054119', 'step': 9628, 'epoch': 2} {'type': 'loss', 'content': 0.16825775802135468, 'timestamp': '2025-10-01 04:29:45.057177', 'step': 9629, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:29:45.129883', 'step': 9629, 'epoch': 2} {'type': 'loss', 'content': 0.13625435531139374, 'timestamp': '2025-10-01 04:29:45.132945', 'step': 9630, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:45.187704', 'step': 9630, 'epoch': 2} {'type': 'loss', 'content': 0.19102640450000763, 'timestamp': '2025-10-01 04:29:45.189790', 'step': 9631, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:45.243497', 'step': 9631, 'epoch': 2} {'type': 'loss', 'content': 0.22042623162269592, 'timestamp': '2025-10-01 04:29:45.249732', 'step': 9632, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:45.302643', 'step': 9632, 'epoch': 2} {'type': 'loss', 'content': 0.179973304271698, 'timestamp': '2025-10-01 04:29:45.304574', 'step': 9633, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:45.358548', 'step': 9633, 'epoch': 2} {'type': 'loss', 'content': 0.14695559442043304, 'timestamp': '2025-10-01 04:29:45.360609', 'step': 9634, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:45.413313', 'step': 9634, 'epoch': 2} {'type': 'loss', 'content': 0.21398265659809113, 'timestamp': '2025-10-01 04:29:45.415245', 'step': 9635, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:29:45.479743', 'step': 9635, 'epoch': 2} {'type': 'loss', 'content': 0.13343088328838348, 'timestamp': '2025-10-01 04:29:45.485537', 'step': 9636, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:45.538037', 'step': 9636, 'epoch': 2} {'type': 'loss', 'content': 0.10198428481817245, 'timestamp': '2025-10-01 04:29:45.540213', 'step': 9637, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:45.604418', 'step': 9637, 'epoch': 2} {'type': 'loss', 'content': 0.08431822806596756, 'timestamp': '2025-10-01 04:29:45.607443', 'step': 9638, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:45.662035', 'step': 9638, 'epoch': 2} {'type': 'loss', 'content': 0.11910905689001083, 'timestamp': '2025-10-01 04:29:45.671394', 'step': 9639, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:45.727935', 'step': 9639, 'epoch': 2} {'type': 'loss', 'content': 0.15548060834407806, 'timestamp': '2025-10-01 04:29:45.734751', 'step': 9640, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:45.789260', 'step': 9640, 'epoch': 2} {'type': 'loss', 'content': 0.11101742088794708, 'timestamp': '2025-10-01 04:29:45.791264', 'step': 9641, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:45.859155', 'step': 9641, 'epoch': 2} {'type': 'loss', 'content': 0.26855117082595825, 'timestamp': '2025-10-01 04:29:45.861336', 'step': 9642, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:45.916076', 'step': 9642, 'epoch': 2} {'type': 'loss', 'content': 0.10901810973882675, 'timestamp': '2025-10-01 04:29:45.920184', 'step': 9643, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:45.975736', 'step': 9643, 'epoch': 2} {'type': 'loss', 'content': 0.1513010859489441, 'timestamp': '2025-10-01 04:29:45.982036', 'step': 9644, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:46.036237', 'step': 9644, 'epoch': 2} {'type': 'loss', 'content': 0.19069266319274902, 'timestamp': '2025-10-01 04:29:46.038457', 'step': 9645, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:46.093663', 'step': 9645, 'epoch': 2} {'type': 'loss', 'content': 0.1631140112876892, 'timestamp': '2025-10-01 04:29:46.095821', 'step': 9646, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:46.150500', 'step': 9646, 'epoch': 2} {'type': 'loss', 'content': 0.1354483664035797, 'timestamp': '2025-10-01 04:29:46.154437', 'step': 9647, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:46.210469', 'step': 9647, 'epoch': 2} {'type': 'loss', 'content': 0.25190407037734985, 'timestamp': '2025-10-01 04:29:46.216533', 'step': 9648, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:46.271652', 'step': 9648, 'epoch': 2} {'type': 'loss', 'content': 0.0987805649638176, 'timestamp': '2025-10-01 04:29:46.273900', 'step': 9649, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:46.328128', 'step': 9649, 'epoch': 2} {'type': 'loss', 'content': 0.1257396936416626, 'timestamp': '2025-10-01 04:29:46.330322', 'step': 9650, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:29:46.387546', 'step': 9650, 'epoch': 2} {'type': 'loss', 'content': 0.07662831246852875, 'timestamp': '2025-10-01 04:29:46.389774', 'step': 9651, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:46.444142', 'step': 9651, 'epoch': 2} {'type': 'loss', 'content': 0.11546691507101059, 'timestamp': '2025-10-01 04:29:46.450445', 'step': 9652, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:46.503256', 'step': 9652, 'epoch': 2} {'type': 'loss', 'content': 0.11739547550678253, 'timestamp': '2025-10-01 04:29:46.505794', 'step': 9653, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:46.559265', 'step': 9653, 'epoch': 2} {'type': 'loss', 'content': 0.14006569981575012, 'timestamp': '2025-10-01 04:29:46.562323', 'step': 9654, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:46.617037', 'step': 9654, 'epoch': 2} {'type': 'loss', 'content': 0.15134800970554352, 'timestamp': '2025-10-01 04:29:46.618989', 'step': 9655, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:46.671728', 'step': 9655, 'epoch': 2} {'type': 'loss', 'content': 0.14462919533252716, 'timestamp': '2025-10-01 04:29:46.677709', 'step': 9656, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:46.730506', 'step': 9656, 'epoch': 2} {'type': 'loss', 'content': 0.11254342645406723, 'timestamp': '2025-10-01 04:29:46.732590', 'step': 9657, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:46.785114', 'step': 9657, 'epoch': 2} {'type': 'loss', 'content': 0.04858427122235298, 'timestamp': '2025-10-01 04:29:46.787455', 'step': 9658, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:46.840747', 'step': 9658, 'epoch': 2} {'type': 'loss', 'content': 0.12956498563289642, 'timestamp': '2025-10-01 04:29:46.842775', 'step': 9659, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:29:46.908168', 'step': 9659, 'epoch': 2} {'type': 'loss', 'content': 0.1577581763267517, 'timestamp': '2025-10-01 04:29:46.913856', 'step': 9660, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:46.967166', 'step': 9660, 'epoch': 2} {'type': 'loss', 'content': 0.08036047965288162, 'timestamp': '2025-10-01 04:29:46.969339', 'step': 9661, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-01 04:29:47.023986', 'step': 9661, 'epoch': 2} {'type': 'loss', 'content': 0.1346723437309265, 'timestamp': '2025-10-01 04:29:47.041369', 'step': 9662, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:47.094744', 'step': 9662, 'epoch': 2} {'type': 'loss', 'content': 0.08774746209383011, 'timestamp': '2025-10-01 04:29:47.096827', 'step': 9663, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:47.151816', 'step': 9663, 'epoch': 2} {'type': 'loss', 'content': 0.07674122601747513, 'timestamp': '2025-10-01 04:29:47.157787', 'step': 9664, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:47.227005', 'step': 9664, 'epoch': 2} {'type': 'loss', 'content': 0.09782641381025314, 'timestamp': '2025-10-01 04:29:47.229219', 'step': 9665, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:47.282366', 'step': 9665, 'epoch': 2} {'type': 'loss', 'content': 0.20101916790008545, 'timestamp': '2025-10-01 04:29:47.284451', 'step': 9666, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:47.348499', 'step': 9666, 'epoch': 2} {'type': 'loss', 'content': 0.1404668688774109, 'timestamp': '2025-10-01 04:29:47.351017', 'step': 9667, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:29:47.405994', 'step': 9667, 'epoch': 2} {'type': 'loss', 'content': 0.14848941564559937, 'timestamp': '2025-10-01 04:29:47.411917', 'step': 9668, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:47.464468', 'step': 9668, 'epoch': 2} {'type': 'loss', 'content': 0.09424854069948196, 'timestamp': '2025-10-01 04:29:47.466800', 'step': 9669, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:47.519453', 'step': 9669, 'epoch': 2} {'type': 'loss', 'content': 0.17820565402507782, 'timestamp': '2025-10-01 04:29:47.521604', 'step': 9670, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:47.574405', 'step': 9670, 'epoch': 2} {'type': 'loss', 'content': 0.26724645495414734, 'timestamp': '2025-10-01 04:29:47.577019', 'step': 9671, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:47.629799', 'step': 9671, 'epoch': 2} {'type': 'loss', 'content': 0.2994353175163269, 'timestamp': '2025-10-01 04:29:47.635504', 'step': 9672, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:47.687530', 'step': 9672, 'epoch': 2} {'type': 'loss', 'content': 0.05850953608751297, 'timestamp': '2025-10-01 04:29:47.689769', 'step': 9673, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:47.746549', 'step': 9673, 'epoch': 2} {'type': 'loss', 'content': 0.15216627717018127, 'timestamp': '2025-10-01 04:29:47.748597', 'step': 9674, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:29:47.801824', 'step': 9674, 'epoch': 2} {'type': 'loss', 'content': 0.15004275739192963, 'timestamp': '2025-10-01 04:29:47.803898', 'step': 9675, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:47.857331', 'step': 9675, 'epoch': 2} {'type': 'loss', 'content': 0.1695886254310608, 'timestamp': '2025-10-01 04:29:47.863087', 'step': 9676, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:47.915426', 'step': 9676, 'epoch': 2} {'type': 'loss', 'content': 0.052406735718250275, 'timestamp': '2025-10-01 04:29:47.917781', 'step': 9677, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:47.970499', 'step': 9677, 'epoch': 2} {'type': 'loss', 'content': 0.23063978552818298, 'timestamp': '2025-10-01 04:29:47.972684', 'step': 9678, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:48.026257', 'step': 9678, 'epoch': 2} {'type': 'loss', 'content': 0.11110924929380417, 'timestamp': '2025-10-01 04:29:48.028242', 'step': 9679, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:29:48.081895', 'step': 9679, 'epoch': 2} {'type': 'loss', 'content': 0.1421709805727005, 'timestamp': '2025-10-01 04:29:48.087533', 'step': 9680, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:48.141571', 'step': 9680, 'epoch': 2} {'type': 'loss', 'content': 0.10039312392473221, 'timestamp': '2025-10-01 04:29:48.143993', 'step': 9681, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:48.196636', 'step': 9681, 'epoch': 2} {'type': 'loss', 'content': 0.09243059158325195, 'timestamp': '2025-10-01 04:29:48.200621', 'step': 9682, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:48.253348', 'step': 9682, 'epoch': 2} {'type': 'loss', 'content': 0.1977231204509735, 'timestamp': '2025-10-01 04:29:48.255421', 'step': 9683, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:48.310027', 'step': 9683, 'epoch': 2} {'type': 'loss', 'content': 0.1669405996799469, 'timestamp': '2025-10-01 04:29:48.329408', 'step': 9684, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:48.382490', 'step': 9684, 'epoch': 2} {'type': 'loss', 'content': 0.14119680225849152, 'timestamp': '2025-10-01 04:29:48.386043', 'step': 9685, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:48.439169', 'step': 9685, 'epoch': 2} {'type': 'loss', 'content': 0.16967450082302094, 'timestamp': '2025-10-01 04:29:48.441409', 'step': 9686, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:48.494827', 'step': 9686, 'epoch': 2} {'type': 'loss', 'content': 0.1514694094657898, 'timestamp': '2025-10-01 04:29:48.498366', 'step': 9687, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:29:48.556686', 'step': 9687, 'epoch': 2} {'type': 'loss', 'content': 0.11458699405193329, 'timestamp': '2025-10-01 04:29:48.562384', 'step': 9688, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:29:48.615456', 'step': 9688, 'epoch': 2} {'type': 'loss', 'content': 0.12227229028940201, 'timestamp': '2025-10-01 04:29:48.617778', 'step': 9689, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:48.681727', 'step': 9689, 'epoch': 2} {'type': 'loss', 'content': 0.13056567311286926, 'timestamp': '2025-10-01 04:29:48.684280', 'step': 9690, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:48.737508', 'step': 9690, 'epoch': 2} {'type': 'loss', 'content': 0.08593499660491943, 'timestamp': '2025-10-01 04:29:48.739613', 'step': 9691, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:48.796311', 'step': 9691, 'epoch': 2} {'type': 'loss', 'content': 0.1416943520307541, 'timestamp': '2025-10-01 04:29:48.803531', 'step': 9692, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:48.856023', 'step': 9692, 'epoch': 2} {'type': 'loss', 'content': 0.07585278898477554, 'timestamp': '2025-10-01 04:29:48.861458', 'step': 9693, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:48.914744', 'step': 9693, 'epoch': 2} {'type': 'loss', 'content': 0.1481359750032425, 'timestamp': '2025-10-01 04:29:48.917025', 'step': 9694, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:48.979017', 'step': 9694, 'epoch': 2} {'type': 'loss', 'content': 0.14160312712192535, 'timestamp': '2025-10-01 04:29:48.983593', 'step': 9695, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:49.059266', 'step': 9695, 'epoch': 2} {'type': 'loss', 'content': 0.13570427894592285, 'timestamp': '2025-10-01 04:29:49.065006', 'step': 9696, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:49.128848', 'step': 9696, 'epoch': 2} {'type': 'loss', 'content': 0.18946506083011627, 'timestamp': '2025-10-01 04:29:49.131012', 'step': 9697, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:49.186258', 'step': 9697, 'epoch': 2} {'type': 'loss', 'content': 0.1317054182291031, 'timestamp': '2025-10-01 04:29:49.188596', 'step': 9698, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:49.242834', 'step': 9698, 'epoch': 2} {'type': 'loss', 'content': 0.10765232145786285, 'timestamp': '2025-10-01 04:29:49.245092', 'step': 9699, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:49.301807', 'step': 9699, 'epoch': 2} {'type': 'loss', 'content': 0.08660470694303513, 'timestamp': '2025-10-01 04:29:49.307310', 'step': 9700, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:49.363500', 'step': 9700, 'epoch': 2} {'type': 'loss', 'content': 0.18528975546360016, 'timestamp': '2025-10-01 04:29:49.365791', 'step': 9701, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:49.426382', 'step': 9701, 'epoch': 2} {'type': 'loss', 'content': 0.1596934199333191, 'timestamp': '2025-10-01 04:29:49.428497', 'step': 9702, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:29:49.481654', 'step': 9702, 'epoch': 2} {'type': 'loss', 'content': 0.1960059553384781, 'timestamp': '2025-10-01 04:29:49.485163', 'step': 9703, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:29:49.543699', 'step': 9703, 'epoch': 2} {'type': 'loss', 'content': 0.055054325610399246, 'timestamp': '2025-10-01 04:29:49.549546', 'step': 9704, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:49.616412', 'step': 9704, 'epoch': 2} {'type': 'loss', 'content': 0.19508902728557587, 'timestamp': '2025-10-01 04:29:49.618481', 'step': 9705, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:29:49.673757', 'step': 9705, 'epoch': 2} {'type': 'loss', 'content': 0.1393113136291504, 'timestamp': '2025-10-01 04:29:49.679146', 'step': 9706, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:29:49.739212', 'step': 9706, 'epoch': 2} {'type': 'loss', 'content': 0.1349826604127884, 'timestamp': '2025-10-01 04:29:49.749186', 'step': 9707, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:29:49.825010', 'step': 9707, 'epoch': 2} {'type': 'loss', 'content': 0.08910682797431946, 'timestamp': '2025-10-01 04:29:49.830587', 'step': 9708, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:49.883752', 'step': 9708, 'epoch': 2} {'type': 'loss', 'content': 0.0774787962436676, 'timestamp': '2025-10-01 04:29:49.896928', 'step': 9709, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:29:49.959978', 'step': 9709, 'epoch': 2} {'type': 'loss', 'content': 0.150626078248024, 'timestamp': '2025-10-01 04:29:49.962358', 'step': 9710, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:29:50.047646', 'step': 9710, 'epoch': 2} {'type': 'loss', 'content': 0.09678643941879272, 'timestamp': '2025-10-01 04:29:50.052267', 'step': 9711, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:50.115181', 'step': 9711, 'epoch': 2} {'type': 'loss', 'content': 0.17495200037956238, 'timestamp': '2025-10-01 04:29:50.122489', 'step': 9712, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:50.200470', 'step': 9712, 'epoch': 2} {'type': 'loss', 'content': 0.11656533181667328, 'timestamp': '2025-10-01 04:29:50.202782', 'step': 9713, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:50.256449', 'step': 9713, 'epoch': 2} {'type': 'loss', 'content': 0.12470874935388565, 'timestamp': '2025-10-01 04:29:50.259304', 'step': 9714, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:29:50.319753', 'step': 9714, 'epoch': 2} {'type': 'loss', 'content': 0.16734932363033295, 'timestamp': '2025-10-01 04:29:50.322193', 'step': 9715, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:50.397315', 'step': 9715, 'epoch': 2} {'type': 'loss', 'content': 0.09996708482503891, 'timestamp': '2025-10-01 04:29:50.403401', 'step': 9716, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:50.478914', 'step': 9716, 'epoch': 2} {'type': 'loss', 'content': 0.17330648005008698, 'timestamp': '2025-10-01 04:29:50.488503', 'step': 9717, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:50.563649', 'step': 9717, 'epoch': 2} {'type': 'loss', 'content': 0.08822372555732727, 'timestamp': '2025-10-01 04:29:50.565906', 'step': 9718, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:50.621617', 'step': 9718, 'epoch': 2} {'type': 'loss', 'content': 0.1833021491765976, 'timestamp': '2025-10-01 04:29:50.623697', 'step': 9719, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:50.677119', 'step': 9719, 'epoch': 2} {'type': 'loss', 'content': 0.11378780752420425, 'timestamp': '2025-10-01 04:29:50.682741', 'step': 9720, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:50.736003', 'step': 9720, 'epoch': 2} {'type': 'loss', 'content': 0.14288166165351868, 'timestamp': '2025-10-01 04:29:50.738814', 'step': 9721, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:50.794099', 'step': 9721, 'epoch': 2} {'type': 'loss', 'content': 0.16861717402935028, 'timestamp': '2025-10-01 04:29:50.796998', 'step': 9722, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:50.850796', 'step': 9722, 'epoch': 2} {'type': 'loss', 'content': 0.09938739985227585, 'timestamp': '2025-10-01 04:29:50.853725', 'step': 9723, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:50.907322', 'step': 9723, 'epoch': 2} {'type': 'loss', 'content': 0.122186578810215, 'timestamp': '2025-10-01 04:29:50.913857', 'step': 9724, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:50.967544', 'step': 9724, 'epoch': 2} {'type': 'loss', 'content': 0.11008767783641815, 'timestamp': '2025-10-01 04:29:50.969569', 'step': 9725, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:51.024482', 'step': 9725, 'epoch': 2} {'type': 'loss', 'content': 0.1317959874868393, 'timestamp': '2025-10-01 04:29:51.027821', 'step': 9726, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:51.080779', 'step': 9726, 'epoch': 2} {'type': 'loss', 'content': 0.1642879992723465, 'timestamp': '2025-10-01 04:29:51.082916', 'step': 9727, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:51.135698', 'step': 9727, 'epoch': 2} {'type': 'loss', 'content': 0.07043446600437164, 'timestamp': '2025-10-01 04:29:51.141476', 'step': 9728, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:51.194905', 'step': 9728, 'epoch': 2} {'type': 'loss', 'content': 0.23080049455165863, 'timestamp': '2025-10-01 04:29:51.197416', 'step': 9729, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:51.250912', 'step': 9729, 'epoch': 2} {'type': 'loss', 'content': 0.09860214591026306, 'timestamp': '2025-10-01 04:29:51.253365', 'step': 9730, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:51.307585', 'step': 9730, 'epoch': 2} {'type': 'loss', 'content': 0.14302366971969604, 'timestamp': '2025-10-01 04:29:51.309790', 'step': 9731, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:51.363762', 'step': 9731, 'epoch': 2} {'type': 'loss', 'content': 0.12360868602991104, 'timestamp': '2025-10-01 04:29:51.370524', 'step': 9732, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:51.435096', 'step': 9732, 'epoch': 2} {'type': 'loss', 'content': 0.1457456797361374, 'timestamp': '2025-10-01 04:29:51.437118', 'step': 9733, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:51.490502', 'step': 9733, 'epoch': 2} {'type': 'loss', 'content': 0.0823802575469017, 'timestamp': '2025-10-01 04:29:51.492669', 'step': 9734, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:51.546406', 'step': 9734, 'epoch': 2} {'type': 'loss', 'content': 0.14873307943344116, 'timestamp': '2025-10-01 04:29:51.548578', 'step': 9735, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:51.603138', 'step': 9735, 'epoch': 2} {'type': 'loss', 'content': 0.1871088594198227, 'timestamp': '2025-10-01 04:29:51.610228', 'step': 9736, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:51.663327', 'step': 9736, 'epoch': 2} {'type': 'loss', 'content': 0.14877989888191223, 'timestamp': '2025-10-01 04:29:51.666188', 'step': 9737, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:51.719569', 'step': 9737, 'epoch': 2} {'type': 'loss', 'content': 0.1512020230293274, 'timestamp': '2025-10-01 04:29:51.721577', 'step': 9738, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:51.774401', 'step': 9738, 'epoch': 2} {'type': 'loss', 'content': 0.18349777162075043, 'timestamp': '2025-10-01 04:29:51.776996', 'step': 9739, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:51.830840', 'step': 9739, 'epoch': 2} {'type': 'loss', 'content': 0.1709672510623932, 'timestamp': '2025-10-01 04:29:51.836965', 'step': 9740, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:51.889778', 'step': 9740, 'epoch': 2} {'type': 'loss', 'content': 0.19293467700481415, 'timestamp': '2025-10-01 04:29:51.891900', 'step': 9741, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:51.945536', 'step': 9741, 'epoch': 2} {'type': 'loss', 'content': 0.15485845506191254, 'timestamp': '2025-10-01 04:29:51.959276', 'step': 9742, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:52.013132', 'step': 9742, 'epoch': 2} {'type': 'loss', 'content': 0.15593957901000977, 'timestamp': '2025-10-01 04:29:52.015543', 'step': 9743, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:52.069071', 'step': 9743, 'epoch': 2} {'type': 'loss', 'content': 0.13929839432239532, 'timestamp': '2025-10-01 04:29:52.074907', 'step': 9744, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:52.130659', 'step': 9744, 'epoch': 2} {'type': 'loss', 'content': 0.1047973781824112, 'timestamp': '2025-10-01 04:29:52.132574', 'step': 9745, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:29:52.204071', 'step': 9745, 'epoch': 2} {'type': 'loss', 'content': 0.05995068699121475, 'timestamp': '2025-10-01 04:29:52.206932', 'step': 9746, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:52.260846', 'step': 9746, 'epoch': 2} {'type': 'loss', 'content': 0.09214718639850616, 'timestamp': '2025-10-01 04:29:52.262936', 'step': 9747, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:52.331541', 'step': 9747, 'epoch': 2} {'type': 'loss', 'content': 0.1536756157875061, 'timestamp': '2025-10-01 04:29:52.337168', 'step': 9748, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:29:52.390163', 'step': 9748, 'epoch': 2} {'type': 'loss', 'content': 0.18239052593708038, 'timestamp': '2025-10-01 04:29:52.392201', 'step': 9749, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:29:52.445031', 'step': 9749, 'epoch': 2} {'type': 'loss', 'content': 0.16190975904464722, 'timestamp': '2025-10-01 04:29:52.447158', 'step': 9750, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:52.500508', 'step': 9750, 'epoch': 2} {'type': 'loss', 'content': 0.20247232913970947, 'timestamp': '2025-10-01 04:29:52.502610', 'step': 9751, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:52.570916', 'step': 9751, 'epoch': 2} {'type': 'loss', 'content': 0.11270053684711456, 'timestamp': '2025-10-01 04:29:52.576594', 'step': 9752, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:52.629235', 'step': 9752, 'epoch': 2} {'type': 'loss', 'content': 0.1582973748445511, 'timestamp': '2025-10-01 04:29:52.631314', 'step': 9753, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:52.684227', 'step': 9753, 'epoch': 2} {'type': 'loss', 'content': 0.1175960823893547, 'timestamp': '2025-10-01 04:29:52.686519', 'step': 9754, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:52.752264', 'step': 9754, 'epoch': 2} {'type': 'loss', 'content': 0.20523102581501007, 'timestamp': '2025-10-01 04:29:52.754375', 'step': 9755, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:52.807724', 'step': 9755, 'epoch': 2} {'type': 'loss', 'content': 0.08610593527555466, 'timestamp': '2025-10-01 04:29:52.813642', 'step': 9756, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:52.878969', 'step': 9756, 'epoch': 2} {'type': 'loss', 'content': 0.1219705194234848, 'timestamp': '2025-10-01 04:29:52.880786', 'step': 9757, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:29:52.948316', 'step': 9757, 'epoch': 2} {'type': 'loss', 'content': 0.07107860594987869, 'timestamp': '2025-10-01 04:29:52.950257', 'step': 9758, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:53.003937', 'step': 9758, 'epoch': 2} {'type': 'loss', 'content': 0.13416315615177155, 'timestamp': '2025-10-01 04:29:53.006333', 'step': 9759, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:53.059146', 'step': 9759, 'epoch': 2} {'type': 'loss', 'content': 0.12660053372383118, 'timestamp': '2025-10-01 04:29:53.065106', 'step': 9760, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:53.118497', 'step': 9760, 'epoch': 2} {'type': 'loss', 'content': 0.1093202531337738, 'timestamp': '2025-10-01 04:29:53.120464', 'step': 9761, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:53.174105', 'step': 9761, 'epoch': 2} {'type': 'loss', 'content': 0.11694220453500748, 'timestamp': '2025-10-01 04:29:53.176172', 'step': 9762, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:53.238064', 'step': 9762, 'epoch': 2} {'type': 'loss', 'content': 0.19498759508132935, 'timestamp': '2025-10-01 04:29:53.240426', 'step': 9763, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:53.295440', 'step': 9763, 'epoch': 2} {'type': 'loss', 'content': 0.15242236852645874, 'timestamp': '2025-10-01 04:29:53.301071', 'step': 9764, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:53.354450', 'step': 9764, 'epoch': 2} {'type': 'loss', 'content': 0.13669824600219727, 'timestamp': '2025-10-01 04:29:53.356551', 'step': 9765, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:53.409288', 'step': 9765, 'epoch': 2} {'type': 'loss', 'content': 0.14581210911273956, 'timestamp': '2025-10-01 04:29:53.411711', 'step': 9766, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:53.466508', 'step': 9766, 'epoch': 2} {'type': 'loss', 'content': 0.07963080704212189, 'timestamp': '2025-10-01 04:29:53.469213', 'step': 9767, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:53.522279', 'step': 9767, 'epoch': 2} {'type': 'loss', 'content': 0.23305705189704895, 'timestamp': '2025-10-01 04:29:53.528027', 'step': 9768, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:29:53.601119', 'step': 9768, 'epoch': 2} {'type': 'loss', 'content': 0.16983762383460999, 'timestamp': '2025-10-01 04:29:53.603781', 'step': 9769, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:53.656985', 'step': 9769, 'epoch': 2} {'type': 'loss', 'content': 0.1813330203294754, 'timestamp': '2025-10-01 04:29:53.658894', 'step': 9770, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:53.712090', 'step': 9770, 'epoch': 2} {'type': 'loss', 'content': 0.06435874849557877, 'timestamp': '2025-10-01 04:29:53.714055', 'step': 9771, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:53.768007', 'step': 9771, 'epoch': 2} {'type': 'loss', 'content': 0.11132574826478958, 'timestamp': '2025-10-01 04:29:53.773828', 'step': 9772, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:29:53.826119', 'step': 9772, 'epoch': 2} {'type': 'loss', 'content': 0.1107301265001297, 'timestamp': '2025-10-01 04:29:53.828057', 'step': 9773, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:53.880820', 'step': 9773, 'epoch': 2} {'type': 'loss', 'content': 0.10666211694478989, 'timestamp': '2025-10-01 04:29:53.883034', 'step': 9774, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:29:53.937757', 'step': 9774, 'epoch': 2} {'type': 'loss', 'content': 0.11714643239974976, 'timestamp': '2025-10-01 04:29:53.939786', 'step': 9775, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:29:53.992903', 'step': 9775, 'epoch': 2} {'type': 'loss', 'content': 0.10497867316007614, 'timestamp': '2025-10-01 04:29:53.998394', 'step': 9776, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:29:54.051682', 'step': 9776, 'epoch': 2} {'type': 'loss', 'content': 0.07181348651647568, 'timestamp': '2025-10-01 04:29:54.053635', 'step': 9777, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:54.106430', 'step': 9777, 'epoch': 2} {'type': 'loss', 'content': 0.07600539922714233, 'timestamp': '2025-10-01 04:29:54.108526', 'step': 9778, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:54.161478', 'step': 9778, 'epoch': 2} {'type': 'loss', 'content': 0.15640351176261902, 'timestamp': '2025-10-01 04:29:54.163645', 'step': 9779, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-01 04:29:54.223908', 'step': 9779, 'epoch': 2} {'type': 'loss', 'content': 0.08124981075525284, 'timestamp': '2025-10-01 04:29:54.232065', 'step': 9780, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:54.301007', 'step': 9780, 'epoch': 2} {'type': 'loss', 'content': 0.19983363151550293, 'timestamp': '2025-10-01 04:29:54.303161', 'step': 9781, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:54.356254', 'step': 9781, 'epoch': 2} {'type': 'loss', 'content': 0.1427234709262848, 'timestamp': '2025-10-01 04:29:54.358761', 'step': 9782, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:29:54.418555', 'step': 9782, 'epoch': 2} {'type': 'loss', 'content': 0.1060875877737999, 'timestamp': '2025-10-01 04:29:54.421482', 'step': 9783, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:54.483892', 'step': 9783, 'epoch': 2} {'type': 'loss', 'content': 0.1871623992919922, 'timestamp': '2025-10-01 04:29:54.489410', 'step': 9784, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:29:54.546105', 'step': 9784, 'epoch': 2} {'type': 'loss', 'content': 0.20605520904064178, 'timestamp': '2025-10-01 04:29:54.548196', 'step': 9785, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:29:54.601267', 'step': 9785, 'epoch': 2} {'type': 'loss', 'content': 0.0790192186832428, 'timestamp': '2025-10-01 04:29:54.603446', 'step': 9786, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-01 04:30:08.252777', 'step': 9786, 'epoch': 2} {'type': 'pplx', 'content': 12817.712189183372, 'timestamp': '2025-10-01 04:30:08.255498', 'step': 9786, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:08.309837', 'step': 9786, 'epoch': 2} {'type': 'loss', 'content': 0.10855843871831894, 'timestamp': '2025-10-01 04:30:08.311911', 'step': 9787, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:30:08.365645', 'step': 9787, 'epoch': 2} {'type': 'loss', 'content': 0.08238950371742249, 'timestamp': '2025-10-01 04:30:08.371788', 'step': 9788, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:08.425103', 'step': 9788, 'epoch': 2} {'type': 'loss', 'content': 0.12586814165115356, 'timestamp': '2025-10-01 04:30:08.427167', 'step': 9789, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:08.480833', 'step': 9789, 'epoch': 2} {'type': 'loss', 'content': 0.08668495714664459, 'timestamp': '2025-10-01 04:30:08.483339', 'step': 9790, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:08.545006', 'step': 9790, 'epoch': 2} {'type': 'loss', 'content': 0.10676028579473495, 'timestamp': '2025-10-01 04:30:08.548193', 'step': 9791, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:08.606731', 'step': 9791, 'epoch': 2} {'type': 'loss', 'content': 0.19249579310417175, 'timestamp': '2025-10-01 04:30:08.612821', 'step': 9792, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:08.665831', 'step': 9792, 'epoch': 2} {'type': 'loss', 'content': 0.10914759337902069, 'timestamp': '2025-10-01 04:30:08.668730', 'step': 9793, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:30:08.732701', 'step': 9793, 'epoch': 2} {'type': 'loss', 'content': 0.166696697473526, 'timestamp': '2025-10-01 04:30:08.734885', 'step': 9794, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:08.788989', 'step': 9794, 'epoch': 2} {'type': 'loss', 'content': 0.10694251954555511, 'timestamp': '2025-10-01 04:30:08.790948', 'step': 9795, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:08.844027', 'step': 9795, 'epoch': 2} {'type': 'loss', 'content': 0.12620767951011658, 'timestamp': '2025-10-01 04:30:08.850052', 'step': 9796, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:08.909286', 'step': 9796, 'epoch': 2} {'type': 'loss', 'content': 0.12806063890457153, 'timestamp': '2025-10-01 04:30:08.911569', 'step': 9797, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:30:08.964934', 'step': 9797, 'epoch': 2} {'type': 'loss', 'content': 0.08055891841650009, 'timestamp': '2025-10-01 04:30:08.966925', 'step': 9798, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:09.020458', 'step': 9798, 'epoch': 2} {'type': 'loss', 'content': 0.09001407772302628, 'timestamp': '2025-10-01 04:30:09.022639', 'step': 9799, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:09.076094', 'step': 9799, 'epoch': 2} {'type': 'loss', 'content': 0.13644374907016754, 'timestamp': '2025-10-01 04:30:09.084801', 'step': 9800, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:09.137766', 'step': 9800, 'epoch': 2} {'type': 'loss', 'content': 0.1371215581893921, 'timestamp': '2025-10-01 04:30:09.139935', 'step': 9801, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:09.194626', 'step': 9801, 'epoch': 2} {'type': 'loss', 'content': 0.11406152695417404, 'timestamp': '2025-10-01 04:30:09.196766', 'step': 9802, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:09.250417', 'step': 9802, 'epoch': 2} {'type': 'loss', 'content': 0.15675146877765656, 'timestamp': '2025-10-01 04:30:09.253240', 'step': 9803, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:09.306847', 'step': 9803, 'epoch': 2} {'type': 'loss', 'content': 0.11407685279846191, 'timestamp': '2025-10-01 04:30:09.312492', 'step': 9804, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:09.366029', 'step': 9804, 'epoch': 2} {'type': 'loss', 'content': 0.15653187036514282, 'timestamp': '2025-10-01 04:30:09.368116', 'step': 9805, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:09.421301', 'step': 9805, 'epoch': 2} {'type': 'loss', 'content': 0.10838858783245087, 'timestamp': '2025-10-01 04:30:09.424679', 'step': 9806, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:09.479795', 'step': 9806, 'epoch': 2} {'type': 'loss', 'content': 0.10607637465000153, 'timestamp': '2025-10-01 04:30:09.482085', 'step': 9807, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:09.537173', 'step': 9807, 'epoch': 2} {'type': 'loss', 'content': 0.09123006463050842, 'timestamp': '2025-10-01 04:30:09.542894', 'step': 9808, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:09.596716', 'step': 9808, 'epoch': 2} {'type': 'loss', 'content': 0.1618996411561966, 'timestamp': '2025-10-01 04:30:09.598881', 'step': 9809, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:09.652008', 'step': 9809, 'epoch': 2} {'type': 'loss', 'content': 0.09623751044273376, 'timestamp': '2025-10-01 04:30:09.654277', 'step': 9810, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:09.710172', 'step': 9810, 'epoch': 2} {'type': 'loss', 'content': 0.2301626354455948, 'timestamp': '2025-10-01 04:30:09.712502', 'step': 9811, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:09.767688', 'step': 9811, 'epoch': 2} {'type': 'loss', 'content': 0.05386875197291374, 'timestamp': '2025-10-01 04:30:09.773298', 'step': 9812, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:09.834409', 'step': 9812, 'epoch': 2} {'type': 'loss', 'content': 0.08149727433919907, 'timestamp': '2025-10-01 04:30:09.836593', 'step': 9813, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:30:09.889955', 'step': 9813, 'epoch': 2} {'type': 'loss', 'content': 0.0606660321354866, 'timestamp': '2025-10-01 04:30:09.892196', 'step': 9814, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:09.947290', 'step': 9814, 'epoch': 2} {'type': 'loss', 'content': 0.12838633358478546, 'timestamp': '2025-10-01 04:30:09.953983', 'step': 9815, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:10.007578', 'step': 9815, 'epoch': 2} {'type': 'loss', 'content': 0.12322304397821426, 'timestamp': '2025-10-01 04:30:10.013235', 'step': 9816, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:10.065984', 'step': 9816, 'epoch': 2} {'type': 'loss', 'content': 0.1264946609735489, 'timestamp': '2025-10-01 04:30:10.068382', 'step': 9817, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:10.121201', 'step': 9817, 'epoch': 2} {'type': 'loss', 'content': 0.08018451929092407, 'timestamp': '2025-10-01 04:30:10.123462', 'step': 9818, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:30:10.176491', 'step': 9818, 'epoch': 2} {'type': 'loss', 'content': 0.13251395523548126, 'timestamp': '2025-10-01 04:30:10.187440', 'step': 9819, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:10.241026', 'step': 9819, 'epoch': 2} {'type': 'loss', 'content': 0.16004304587841034, 'timestamp': '2025-10-01 04:30:10.246923', 'step': 9820, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:10.300210', 'step': 9820, 'epoch': 2} {'type': 'loss', 'content': 0.16723836958408356, 'timestamp': '2025-10-01 04:30:10.302581', 'step': 9821, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:10.355654', 'step': 9821, 'epoch': 2} {'type': 'loss', 'content': 0.17168952524662018, 'timestamp': '2025-10-01 04:30:10.357780', 'step': 9822, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:10.411873', 'step': 9822, 'epoch': 2} {'type': 'loss', 'content': 0.12346433103084564, 'timestamp': '2025-10-01 04:30:10.414001', 'step': 9823, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:30:10.467284', 'step': 9823, 'epoch': 2} {'type': 'loss', 'content': 0.32807841897010803, 'timestamp': '2025-10-01 04:30:10.472834', 'step': 9824, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:10.525604', 'step': 9824, 'epoch': 2} {'type': 'loss', 'content': 0.1742912083864212, 'timestamp': '2025-10-01 04:30:10.527683', 'step': 9825, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:10.580611', 'step': 9825, 'epoch': 2} {'type': 'loss', 'content': 0.22043567895889282, 'timestamp': '2025-10-01 04:30:10.584723', 'step': 9826, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:10.638363', 'step': 9826, 'epoch': 2} {'type': 'loss', 'content': 0.15232515335083008, 'timestamp': '2025-10-01 04:30:10.640354', 'step': 9827, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:10.693177', 'step': 9827, 'epoch': 2} {'type': 'loss', 'content': 0.13894832134246826, 'timestamp': '2025-10-01 04:30:10.698887', 'step': 9828, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:10.753084', 'step': 9828, 'epoch': 2} {'type': 'loss', 'content': 0.1749107986688614, 'timestamp': '2025-10-01 04:30:10.756599', 'step': 9829, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:10.809576', 'step': 9829, 'epoch': 2} {'type': 'loss', 'content': 0.08266161382198334, 'timestamp': '2025-10-01 04:30:10.811889', 'step': 9830, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:10.865192', 'step': 9830, 'epoch': 2} {'type': 'loss', 'content': 0.07589031755924225, 'timestamp': '2025-10-01 04:30:10.867211', 'step': 9831, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:10.920919', 'step': 9831, 'epoch': 2} {'type': 'loss', 'content': 0.09821097552776337, 'timestamp': '2025-10-01 04:30:10.926817', 'step': 9832, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:10.979906', 'step': 9832, 'epoch': 2} {'type': 'loss', 'content': 0.15288691222667694, 'timestamp': '2025-10-01 04:30:10.982024', 'step': 9833, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:11.034742', 'step': 9833, 'epoch': 2} {'type': 'loss', 'content': 0.13881705701351166, 'timestamp': '2025-10-01 04:30:11.036693', 'step': 9834, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:11.090006', 'step': 9834, 'epoch': 2} {'type': 'loss', 'content': 0.11002261191606522, 'timestamp': '2025-10-01 04:30:11.091993', 'step': 9835, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:11.144305', 'step': 9835, 'epoch': 2} {'type': 'loss', 'content': 0.17267262935638428, 'timestamp': '2025-10-01 04:30:11.150332', 'step': 9836, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:11.207991', 'step': 9836, 'epoch': 2} {'type': 'loss', 'content': 0.22847004234790802, 'timestamp': '2025-10-01 04:30:11.209762', 'step': 9837, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:11.263115', 'step': 9837, 'epoch': 2} {'type': 'loss', 'content': 0.1660555750131607, 'timestamp': '2025-10-01 04:30:11.265193', 'step': 9838, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:11.319151', 'step': 9838, 'epoch': 2} {'type': 'loss', 'content': 0.10463778674602509, 'timestamp': '2025-10-01 04:30:11.321306', 'step': 9839, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:11.374399', 'step': 9839, 'epoch': 2} {'type': 'loss', 'content': 0.08194958418607712, 'timestamp': '2025-10-01 04:30:11.380692', 'step': 9840, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:11.433838', 'step': 9840, 'epoch': 2} {'type': 'loss', 'content': 0.09256184846162796, 'timestamp': '2025-10-01 04:30:11.436008', 'step': 9841, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:30:11.489622', 'step': 9841, 'epoch': 2} {'type': 'loss', 'content': 0.1643826961517334, 'timestamp': '2025-10-01 04:30:11.491583', 'step': 9842, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:11.544940', 'step': 9842, 'epoch': 2} {'type': 'loss', 'content': 0.1784348338842392, 'timestamp': '2025-10-01 04:30:11.547076', 'step': 9843, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:30:11.600532', 'step': 9843, 'epoch': 2} {'type': 'loss', 'content': 0.23566681146621704, 'timestamp': '2025-10-01 04:30:11.605961', 'step': 9844, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:11.658722', 'step': 9844, 'epoch': 2} {'type': 'loss', 'content': 0.12832489609718323, 'timestamp': '2025-10-01 04:30:11.660692', 'step': 9845, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:30:11.713628', 'step': 9845, 'epoch': 2} {'type': 'loss', 'content': 0.16696816682815552, 'timestamp': '2025-10-01 04:30:11.715796', 'step': 9846, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:11.769219', 'step': 9846, 'epoch': 2} {'type': 'loss', 'content': 0.13030654191970825, 'timestamp': '2025-10-01 04:30:11.771178', 'step': 9847, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:11.824045', 'step': 9847, 'epoch': 2} {'type': 'loss', 'content': 0.13482582569122314, 'timestamp': '2025-10-01 04:30:11.829801', 'step': 9848, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:11.883365', 'step': 9848, 'epoch': 2} {'type': 'loss', 'content': 0.15477603673934937, 'timestamp': '2025-10-01 04:30:11.892744', 'step': 9849, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:11.947019', 'step': 9849, 'epoch': 2} {'type': 'loss', 'content': 0.16003915667533875, 'timestamp': '2025-10-01 04:30:11.949255', 'step': 9850, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:30:12.002320', 'step': 9850, 'epoch': 2} {'type': 'loss', 'content': 0.2184995859861374, 'timestamp': '2025-10-01 04:30:12.004463', 'step': 9851, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:12.066162', 'step': 9851, 'epoch': 2} {'type': 'loss', 'content': 0.18277674913406372, 'timestamp': '2025-10-01 04:30:12.071548', 'step': 9852, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:12.124014', 'step': 9852, 'epoch': 2} {'type': 'loss', 'content': 0.20805731415748596, 'timestamp': '2025-10-01 04:30:12.126137', 'step': 9853, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:12.179370', 'step': 9853, 'epoch': 2} {'type': 'loss', 'content': 0.10166125744581223, 'timestamp': '2025-10-01 04:30:12.181538', 'step': 9854, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:12.234715', 'step': 9854, 'epoch': 2} {'type': 'loss', 'content': 0.17440800368785858, 'timestamp': '2025-10-01 04:30:12.236832', 'step': 9855, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:12.300838', 'step': 9855, 'epoch': 2} {'type': 'loss', 'content': 0.180135115981102, 'timestamp': '2025-10-01 04:30:12.306612', 'step': 9856, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:12.359428', 'step': 9856, 'epoch': 2} {'type': 'loss', 'content': 0.0778154730796814, 'timestamp': '2025-10-01 04:30:12.363015', 'step': 9857, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:12.422085', 'step': 9857, 'epoch': 2} {'type': 'loss', 'content': 0.09228029102087021, 'timestamp': '2025-10-01 04:30:12.424904', 'step': 9858, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:12.478658', 'step': 9858, 'epoch': 2} {'type': 'loss', 'content': 0.2064424306154251, 'timestamp': '2025-10-01 04:30:12.480766', 'step': 9859, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:30:12.534336', 'step': 9859, 'epoch': 2} {'type': 'loss', 'content': 0.0968250259757042, 'timestamp': '2025-10-01 04:30:12.540257', 'step': 9860, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:12.603003', 'step': 9860, 'epoch': 2} {'type': 'loss', 'content': 0.14569750428199768, 'timestamp': '2025-10-01 04:30:12.605256', 'step': 9861, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:12.658449', 'step': 9861, 'epoch': 2} {'type': 'loss', 'content': 0.09732893109321594, 'timestamp': '2025-10-01 04:30:12.660806', 'step': 9862, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:12.724996', 'step': 9862, 'epoch': 2} {'type': 'loss', 'content': 0.14413772523403168, 'timestamp': '2025-10-01 04:30:12.727203', 'step': 9863, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:12.786262', 'step': 9863, 'epoch': 2} {'type': 'loss', 'content': 0.12086737155914307, 'timestamp': '2025-10-01 04:30:12.792099', 'step': 9864, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:12.844854', 'step': 9864, 'epoch': 2} {'type': 'loss', 'content': 0.0937054380774498, 'timestamp': '2025-10-01 04:30:12.847102', 'step': 9865, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:12.910865', 'step': 9865, 'epoch': 2} {'type': 'loss', 'content': 0.11334791034460068, 'timestamp': '2025-10-01 04:30:12.913718', 'step': 9866, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:12.968748', 'step': 9866, 'epoch': 2} {'type': 'loss', 'content': 0.13177411258220673, 'timestamp': '2025-10-01 04:30:12.971198', 'step': 9867, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:30:13.024705', 'step': 9867, 'epoch': 2} {'type': 'loss', 'content': 0.07310350984334946, 'timestamp': '2025-10-01 04:30:13.030444', 'step': 9868, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:13.083238', 'step': 9868, 'epoch': 2} {'type': 'loss', 'content': 0.17590607702732086, 'timestamp': '2025-10-01 04:30:13.085882', 'step': 9869, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:30:13.139999', 'step': 9869, 'epoch': 2} {'type': 'loss', 'content': 0.16544675827026367, 'timestamp': '2025-10-01 04:30:13.141891', 'step': 9870, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:13.194984', 'step': 9870, 'epoch': 2} {'type': 'loss', 'content': 0.22086867690086365, 'timestamp': '2025-10-01 04:30:13.197211', 'step': 9871, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:13.250123', 'step': 9871, 'epoch': 2} {'type': 'loss', 'content': 0.20887920260429382, 'timestamp': '2025-10-01 04:30:13.267913', 'step': 9872, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:13.320292', 'step': 9872, 'epoch': 2} {'type': 'loss', 'content': 0.13715626299381256, 'timestamp': '2025-10-01 04:30:13.322781', 'step': 9873, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:13.376041', 'step': 9873, 'epoch': 2} {'type': 'loss', 'content': 0.13223053514957428, 'timestamp': '2025-10-01 04:30:13.379402', 'step': 9874, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:13.448053', 'step': 9874, 'epoch': 2} {'type': 'loss', 'content': 0.19002164900302887, 'timestamp': '2025-10-01 04:30:13.450253', 'step': 9875, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:30:13.504634', 'step': 9875, 'epoch': 2} {'type': 'loss', 'content': 0.19805458188056946, 'timestamp': '2025-10-01 04:30:13.521908', 'step': 9876, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:30:13.574232', 'step': 9876, 'epoch': 2} {'type': 'loss', 'content': 0.16111966967582703, 'timestamp': '2025-10-01 04:30:13.576299', 'step': 9877, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:13.629815', 'step': 9877, 'epoch': 2} {'type': 'loss', 'content': 0.2442653477191925, 'timestamp': '2025-10-01 04:30:13.632056', 'step': 9878, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:13.686077', 'step': 9878, 'epoch': 2} {'type': 'loss', 'content': 0.1266806572675705, 'timestamp': '2025-10-01 04:30:13.688384', 'step': 9879, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:13.741671', 'step': 9879, 'epoch': 2} {'type': 'loss', 'content': 0.11724469065666199, 'timestamp': '2025-10-01 04:30:13.747349', 'step': 9880, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:30:13.800383', 'step': 9880, 'epoch': 2} {'type': 'loss', 'content': 0.15977731347084045, 'timestamp': '2025-10-01 04:30:13.802597', 'step': 9881, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:13.856117', 'step': 9881, 'epoch': 2} {'type': 'loss', 'content': 0.12430708855390549, 'timestamp': '2025-10-01 04:30:13.858310', 'step': 9882, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:30:13.911766', 'step': 9882, 'epoch': 2} {'type': 'loss', 'content': 0.15360815823078156, 'timestamp': '2025-10-01 04:30:13.913898', 'step': 9883, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:13.966804', 'step': 9883, 'epoch': 2} {'type': 'loss', 'content': 0.09843140840530396, 'timestamp': '2025-10-01 04:30:13.980999', 'step': 9884, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:14.033932', 'step': 9884, 'epoch': 2} {'type': 'loss', 'content': 0.167417511343956, 'timestamp': '2025-10-01 04:30:14.036041', 'step': 9885, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:14.089345', 'step': 9885, 'epoch': 2} {'type': 'loss', 'content': 0.1432536095380783, 'timestamp': '2025-10-01 04:30:14.091284', 'step': 9886, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:14.144729', 'step': 9886, 'epoch': 2} {'type': 'loss', 'content': 0.08709751069545746, 'timestamp': '2025-10-01 04:30:14.146950', 'step': 9887, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:14.200591', 'step': 9887, 'epoch': 2} {'type': 'loss', 'content': 0.19817039370536804, 'timestamp': '2025-10-01 04:30:14.206280', 'step': 9888, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:14.267505', 'step': 9888, 'epoch': 2} {'type': 'loss', 'content': 0.09303954243659973, 'timestamp': '2025-10-01 04:30:14.269449', 'step': 9889, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:30:14.323571', 'step': 9889, 'epoch': 2} {'type': 'loss', 'content': 0.09647836536169052, 'timestamp': '2025-10-01 04:30:14.334326', 'step': 9890, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:14.388455', 'step': 9890, 'epoch': 2} {'type': 'loss', 'content': 0.15619491040706635, 'timestamp': '2025-10-01 04:30:14.391123', 'step': 9891, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:14.449208', 'step': 9891, 'epoch': 2} {'type': 'loss', 'content': 0.1336349993944168, 'timestamp': '2025-10-01 04:30:14.456527', 'step': 9892, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:14.521730', 'step': 9892, 'epoch': 2} {'type': 'loss', 'content': 0.10783438384532928, 'timestamp': '2025-10-01 04:30:14.524243', 'step': 9893, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:14.579409', 'step': 9893, 'epoch': 2} {'type': 'loss', 'content': 0.09541390091180801, 'timestamp': '2025-10-01 04:30:14.585739', 'step': 9894, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:14.651720', 'step': 9894, 'epoch': 2} {'type': 'loss', 'content': 0.22377535700798035, 'timestamp': '2025-10-01 04:30:14.655109', 'step': 9895, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:14.710697', 'step': 9895, 'epoch': 2} {'type': 'loss', 'content': 0.08052918314933777, 'timestamp': '2025-10-01 04:30:14.717039', 'step': 9896, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:14.777911', 'step': 9896, 'epoch': 2} {'type': 'loss', 'content': 0.08484197407960892, 'timestamp': '2025-10-01 04:30:14.781429', 'step': 9897, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:14.841068', 'step': 9897, 'epoch': 2} {'type': 'loss', 'content': 0.14243359863758087, 'timestamp': '2025-10-01 04:30:14.843471', 'step': 9898, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:14.908184', 'step': 9898, 'epoch': 2} {'type': 'loss', 'content': 0.12522725760936737, 'timestamp': '2025-10-01 04:30:14.910583', 'step': 9899, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:14.964352', 'step': 9899, 'epoch': 2} {'type': 'loss', 'content': 0.1466650664806366, 'timestamp': '2025-10-01 04:30:14.974622', 'step': 9900, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:15.036529', 'step': 9900, 'epoch': 2} {'type': 'loss', 'content': 0.1794581562280655, 'timestamp': '2025-10-01 04:30:15.038743', 'step': 9901, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:15.092291', 'step': 9901, 'epoch': 2} {'type': 'loss', 'content': 0.16325068473815918, 'timestamp': '2025-10-01 04:30:15.096940', 'step': 9902, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:30:15.152020', 'step': 9902, 'epoch': 2} {'type': 'loss', 'content': 0.1435554027557373, 'timestamp': '2025-10-01 04:30:15.162729', 'step': 9903, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:15.216308', 'step': 9903, 'epoch': 2} {'type': 'loss', 'content': 0.19056695699691772, 'timestamp': '2025-10-01 04:30:15.224991', 'step': 9904, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:30:15.277942', 'step': 9904, 'epoch': 2} {'type': 'loss', 'content': 0.025548266246914864, 'timestamp': '2025-10-01 04:30:15.280245', 'step': 9905, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:15.332938', 'step': 9905, 'epoch': 2} {'type': 'loss', 'content': 0.1361989974975586, 'timestamp': '2025-10-01 04:30:15.338804', 'step': 9906, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:15.394167', 'step': 9906, 'epoch': 2} {'type': 'loss', 'content': 0.0891495868563652, 'timestamp': '2025-10-01 04:30:15.398412', 'step': 9907, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:15.452132', 'step': 9907, 'epoch': 2} {'type': 'loss', 'content': 0.14980150759220123, 'timestamp': '2025-10-01 04:30:15.458452', 'step': 9908, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:15.520240', 'step': 9908, 'epoch': 2} {'type': 'loss', 'content': 0.09917937964200974, 'timestamp': '2025-10-01 04:30:15.522474', 'step': 9909, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:15.575306', 'step': 9909, 'epoch': 2} {'type': 'loss', 'content': 0.14011724293231964, 'timestamp': '2025-10-01 04:30:15.580659', 'step': 9910, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:15.637039', 'step': 9910, 'epoch': 2} {'type': 'loss', 'content': 0.09377475082874298, 'timestamp': '2025-10-01 04:30:15.642841', 'step': 9911, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:15.704468', 'step': 9911, 'epoch': 2} {'type': 'loss', 'content': 0.147988960146904, 'timestamp': '2025-10-01 04:30:15.710610', 'step': 9912, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:15.764355', 'step': 9912, 'epoch': 2} {'type': 'loss', 'content': 0.13865432143211365, 'timestamp': '2025-10-01 04:30:15.766649', 'step': 9913, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:15.820502', 'step': 9913, 'epoch': 2} {'type': 'loss', 'content': 0.07385004311800003, 'timestamp': '2025-10-01 04:30:15.822955', 'step': 9914, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:15.877581', 'step': 9914, 'epoch': 2} {'type': 'loss', 'content': 0.13087216019630432, 'timestamp': '2025-10-01 04:30:15.879957', 'step': 9915, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:30:15.935194', 'step': 9915, 'epoch': 2} {'type': 'loss', 'content': 0.1357036828994751, 'timestamp': '2025-10-01 04:30:15.941300', 'step': 9916, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:15.996477', 'step': 9916, 'epoch': 2} {'type': 'loss', 'content': 0.13350029289722443, 'timestamp': '2025-10-01 04:30:15.998924', 'step': 9917, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:16.054563', 'step': 9917, 'epoch': 2} {'type': 'loss', 'content': 0.11496514827013016, 'timestamp': '2025-10-01 04:30:16.057515', 'step': 9918, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:16.114451', 'step': 9918, 'epoch': 2} {'type': 'loss', 'content': 0.15472114086151123, 'timestamp': '2025-10-01 04:30:16.118503', 'step': 9919, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:16.176126', 'step': 9919, 'epoch': 2} {'type': 'loss', 'content': 0.10553814470767975, 'timestamp': '2025-10-01 04:30:16.182877', 'step': 9920, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:16.240800', 'step': 9920, 'epoch': 2} {'type': 'loss', 'content': 0.12989413738250732, 'timestamp': '2025-10-01 04:30:16.244166', 'step': 9921, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:16.303069', 'step': 9921, 'epoch': 2} {'type': 'loss', 'content': 0.13045847415924072, 'timestamp': '2025-10-01 04:30:16.305467', 'step': 9922, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:16.361594', 'step': 9922, 'epoch': 2} {'type': 'loss', 'content': 0.1843535155057907, 'timestamp': '2025-10-01 04:30:16.366198', 'step': 9923, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:16.423158', 'step': 9923, 'epoch': 2} {'type': 'loss', 'content': 0.11706952750682831, 'timestamp': '2025-10-01 04:30:16.430282', 'step': 9924, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:30:16.494960', 'step': 9924, 'epoch': 2} {'type': 'loss', 'content': 0.15048781037330627, 'timestamp': '2025-10-01 04:30:16.500064', 'step': 9925, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:16.557763', 'step': 9925, 'epoch': 2} {'type': 'loss', 'content': 0.10655857622623444, 'timestamp': '2025-10-01 04:30:16.560095', 'step': 9926, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:16.617449', 'step': 9926, 'epoch': 2} {'type': 'loss', 'content': 0.08753186464309692, 'timestamp': '2025-10-01 04:30:16.619798', 'step': 9927, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:30:16.675137', 'step': 9927, 'epoch': 2} {'type': 'loss', 'content': 0.18219149112701416, 'timestamp': '2025-10-01 04:30:16.683527', 'step': 9928, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:16.736870', 'step': 9928, 'epoch': 2} {'type': 'loss', 'content': 0.09659888595342636, 'timestamp': '2025-10-01 04:30:16.739026', 'step': 9929, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:16.792219', 'step': 9929, 'epoch': 2} {'type': 'loss', 'content': 0.266304075717926, 'timestamp': '2025-10-01 04:30:16.794544', 'step': 9930, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:16.848278', 'step': 9930, 'epoch': 2} {'type': 'loss', 'content': 0.1081865057349205, 'timestamp': '2025-10-01 04:30:16.851020', 'step': 9931, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:16.903896', 'step': 9931, 'epoch': 2} {'type': 'loss', 'content': 0.10501890629529953, 'timestamp': '2025-10-01 04:30:16.909638', 'step': 9932, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:16.971851', 'step': 9932, 'epoch': 2} {'type': 'loss', 'content': 0.09185991436243057, 'timestamp': '2025-10-01 04:30:16.973962', 'step': 9933, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:17.026794', 'step': 9933, 'epoch': 2} {'type': 'loss', 'content': 0.17275886237621307, 'timestamp': '2025-10-01 04:30:17.029786', 'step': 9934, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:30:17.089337', 'step': 9934, 'epoch': 2} {'type': 'loss', 'content': 0.07471711188554764, 'timestamp': '2025-10-01 04:30:17.091976', 'step': 9935, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:17.145798', 'step': 9935, 'epoch': 2} {'type': 'loss', 'content': 0.1347011923789978, 'timestamp': '2025-10-01 04:30:17.154690', 'step': 9936, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:17.207678', 'step': 9936, 'epoch': 2} {'type': 'loss', 'content': 0.17451994121074677, 'timestamp': '2025-10-01 04:30:17.219754', 'step': 9937, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:17.274326', 'step': 9937, 'epoch': 2} {'type': 'loss', 'content': 0.06702467799186707, 'timestamp': '2025-10-01 04:30:17.285867', 'step': 9938, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:17.352780', 'step': 9938, 'epoch': 2} {'type': 'loss', 'content': 0.13364244997501373, 'timestamp': '2025-10-01 04:30:17.354908', 'step': 9939, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:17.408435', 'step': 9939, 'epoch': 2} {'type': 'loss', 'content': 0.07313187420368195, 'timestamp': '2025-10-01 04:30:17.414704', 'step': 9940, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:30:17.467813', 'step': 9940, 'epoch': 2} {'type': 'loss', 'content': 0.2046738564968109, 'timestamp': '2025-10-01 04:30:17.470209', 'step': 9941, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:30:17.523835', 'step': 9941, 'epoch': 2} {'type': 'loss', 'content': 0.10059475153684616, 'timestamp': '2025-10-01 04:30:17.526026', 'step': 9942, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:17.579078', 'step': 9942, 'epoch': 2} {'type': 'loss', 'content': 0.16937389969825745, 'timestamp': '2025-10-01 04:30:17.581477', 'step': 9943, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:17.635138', 'step': 9943, 'epoch': 2} {'type': 'loss', 'content': 0.15367485582828522, 'timestamp': '2025-10-01 04:30:17.640716', 'step': 9944, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:17.693438', 'step': 9944, 'epoch': 2} {'type': 'loss', 'content': 0.17226487398147583, 'timestamp': '2025-10-01 04:30:17.695897', 'step': 9945, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:17.748563', 'step': 9945, 'epoch': 2} {'type': 'loss', 'content': 0.17363862693309784, 'timestamp': '2025-10-01 04:30:17.756499', 'step': 9946, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:17.809401', 'step': 9946, 'epoch': 2} {'type': 'loss', 'content': 0.08491503447294235, 'timestamp': '2025-10-01 04:30:17.812129', 'step': 9947, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:30:17.865183', 'step': 9947, 'epoch': 2} {'type': 'loss', 'content': 0.15849436819553375, 'timestamp': '2025-10-01 04:30:17.870847', 'step': 9948, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:17.923716', 'step': 9948, 'epoch': 2} {'type': 'loss', 'content': 0.14271658658981323, 'timestamp': '2025-10-01 04:30:17.925964', 'step': 9949, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:17.980290', 'step': 9949, 'epoch': 2} {'type': 'loss', 'content': 0.12178437411785126, 'timestamp': '2025-10-01 04:30:17.982684', 'step': 9950, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:18.036064', 'step': 9950, 'epoch': 2} {'type': 'loss', 'content': 0.1494404524564743, 'timestamp': '2025-10-01 04:30:18.038460', 'step': 9951, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:18.093285', 'step': 9951, 'epoch': 2} {'type': 'loss', 'content': 0.0981183648109436, 'timestamp': '2025-10-01 04:30:18.100761', 'step': 9952, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:18.154225', 'step': 9952, 'epoch': 2} {'type': 'loss', 'content': 0.07725992053747177, 'timestamp': '2025-10-01 04:30:18.156351', 'step': 9953, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:18.210775', 'step': 9953, 'epoch': 2} {'type': 'loss', 'content': 0.232722207903862, 'timestamp': '2025-10-01 04:30:18.216663', 'step': 9954, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:18.270297', 'step': 9954, 'epoch': 2} {'type': 'loss', 'content': 0.18890799582004547, 'timestamp': '2025-10-01 04:30:18.272441', 'step': 9955, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:18.326018', 'step': 9955, 'epoch': 2} {'type': 'loss', 'content': 0.09076525270938873, 'timestamp': '2025-10-01 04:30:18.333220', 'step': 9956, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:18.386105', 'step': 9956, 'epoch': 2} {'type': 'loss', 'content': 0.13253414630889893, 'timestamp': '2025-10-01 04:30:18.388881', 'step': 9957, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:30:18.445882', 'step': 9957, 'epoch': 2} {'type': 'loss', 'content': 0.13428185880184174, 'timestamp': '2025-10-01 04:30:18.447915', 'step': 9958, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:18.500844', 'step': 9958, 'epoch': 2} {'type': 'loss', 'content': 0.08335225284099579, 'timestamp': '2025-10-01 04:30:18.502831', 'step': 9959, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:18.555569', 'step': 9959, 'epoch': 2} {'type': 'loss', 'content': 0.16350838541984558, 'timestamp': '2025-10-01 04:30:18.561223', 'step': 9960, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:18.613442', 'step': 9960, 'epoch': 2} {'type': 'loss', 'content': 0.14333879947662354, 'timestamp': '2025-10-01 04:30:18.615979', 'step': 9961, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:18.670621', 'step': 9961, 'epoch': 2} {'type': 'loss', 'content': 0.17338602244853973, 'timestamp': '2025-10-01 04:30:18.672809', 'step': 9962, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:18.726354', 'step': 9962, 'epoch': 2} {'type': 'loss', 'content': 0.08479837328195572, 'timestamp': '2025-10-01 04:30:18.728664', 'step': 9963, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:30:18.782033', 'step': 9963, 'epoch': 2} {'type': 'loss', 'content': 0.10141277313232422, 'timestamp': '2025-10-01 04:30:18.788178', 'step': 9964, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:18.840993', 'step': 9964, 'epoch': 2} {'type': 'loss', 'content': 0.21207071840763092, 'timestamp': '2025-10-01 04:30:18.843480', 'step': 9965, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:18.896641', 'step': 9965, 'epoch': 2} {'type': 'loss', 'content': 0.09102721512317657, 'timestamp': '2025-10-01 04:30:18.898785', 'step': 9966, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:18.951794', 'step': 9966, 'epoch': 2} {'type': 'loss', 'content': 0.07470649480819702, 'timestamp': '2025-10-01 04:30:18.958203', 'step': 9967, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:19.017452', 'step': 9967, 'epoch': 2} {'type': 'loss', 'content': 0.237068310379982, 'timestamp': '2025-10-01 04:30:19.023101', 'step': 9968, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:19.076612', 'step': 9968, 'epoch': 2} {'type': 'loss', 'content': 0.05634383112192154, 'timestamp': '2025-10-01 04:30:19.078862', 'step': 9969, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:19.132408', 'step': 9969, 'epoch': 2} {'type': 'loss', 'content': 0.10827592015266418, 'timestamp': '2025-10-01 04:30:19.134338', 'step': 9970, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:19.187705', 'step': 9970, 'epoch': 2} {'type': 'loss', 'content': 0.1567559689283371, 'timestamp': '2025-10-01 04:30:19.189989', 'step': 9971, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:19.242707', 'step': 9971, 'epoch': 2} {'type': 'loss', 'content': 0.12544572353363037, 'timestamp': '2025-10-01 04:30:19.248447', 'step': 9972, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:19.300958', 'step': 9972, 'epoch': 2} {'type': 'loss', 'content': 0.2403073012828827, 'timestamp': '2025-10-01 04:30:19.303020', 'step': 9973, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:19.357232', 'step': 9973, 'epoch': 2} {'type': 'loss', 'content': 0.12358270585536957, 'timestamp': '2025-10-01 04:30:19.363506', 'step': 9974, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:19.425795', 'step': 9974, 'epoch': 2} {'type': 'loss', 'content': 0.13819068670272827, 'timestamp': '2025-10-01 04:30:19.428127', 'step': 9975, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:19.484138', 'step': 9975, 'epoch': 2} {'type': 'loss', 'content': 0.137920081615448, 'timestamp': '2025-10-01 04:30:19.489906', 'step': 9976, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:19.542688', 'step': 9976, 'epoch': 2} {'type': 'loss', 'content': 0.1512559950351715, 'timestamp': '2025-10-01 04:30:19.545108', 'step': 9977, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:19.600164', 'step': 9977, 'epoch': 2} {'type': 'loss', 'content': 0.09005166590213776, 'timestamp': '2025-10-01 04:30:19.604202', 'step': 9978, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:30:19.671158', 'step': 9978, 'epoch': 2} {'type': 'loss', 'content': 0.14035648107528687, 'timestamp': '2025-10-01 04:30:19.673429', 'step': 9979, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:19.730013', 'step': 9979, 'epoch': 2} {'type': 'loss', 'content': 0.10674615204334259, 'timestamp': '2025-10-01 04:30:19.735876', 'step': 9980, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:19.788794', 'step': 9980, 'epoch': 2} {'type': 'loss', 'content': 0.22050561010837555, 'timestamp': '2025-10-01 04:30:19.791047', 'step': 9981, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:19.844709', 'step': 9981, 'epoch': 2} {'type': 'loss', 'content': 0.07155033946037292, 'timestamp': '2025-10-01 04:30:19.846914', 'step': 9982, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:19.900414', 'step': 9982, 'epoch': 2} {'type': 'loss', 'content': 0.09607807546854019, 'timestamp': '2025-10-01 04:30:19.903176', 'step': 9983, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:19.956414', 'step': 9983, 'epoch': 2} {'type': 'loss', 'content': 0.11069490760564804, 'timestamp': '2025-10-01 04:30:19.962047', 'step': 9984, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:20.015301', 'step': 9984, 'epoch': 2} {'type': 'loss', 'content': 0.16218899190425873, 'timestamp': '2025-10-01 04:30:20.017373', 'step': 9985, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:20.071272', 'step': 9985, 'epoch': 2} {'type': 'loss', 'content': 0.0881570503115654, 'timestamp': '2025-10-01 04:30:20.073445', 'step': 9986, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:20.127247', 'step': 9986, 'epoch': 2} {'type': 'loss', 'content': 0.15348736941814423, 'timestamp': '2025-10-01 04:30:20.129468', 'step': 9987, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:20.183151', 'step': 9987, 'epoch': 2} {'type': 'loss', 'content': 0.15847505629062653, 'timestamp': '2025-10-01 04:30:20.189700', 'step': 9988, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:30:20.242660', 'step': 9988, 'epoch': 2} {'type': 'loss', 'content': 0.15075016021728516, 'timestamp': '2025-10-01 04:30:20.245150', 'step': 9989, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:20.299012', 'step': 9989, 'epoch': 2} {'type': 'loss', 'content': 0.16406139731407166, 'timestamp': '2025-10-01 04:30:20.301504', 'step': 9990, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:30:20.354908', 'step': 9990, 'epoch': 2} {'type': 'loss', 'content': 0.16664814949035645, 'timestamp': '2025-10-01 04:30:20.357195', 'step': 9991, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:20.410993', 'step': 9991, 'epoch': 2} {'type': 'loss', 'content': 0.13771626353263855, 'timestamp': '2025-10-01 04:30:20.416623', 'step': 9992, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:20.469099', 'step': 9992, 'epoch': 2} {'type': 'loss', 'content': 0.11172130703926086, 'timestamp': '2025-10-01 04:30:20.471166', 'step': 9993, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:20.524882', 'step': 9993, 'epoch': 2} {'type': 'loss', 'content': 0.10016202181577682, 'timestamp': '2025-10-01 04:30:20.527499', 'step': 9994, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:20.594727', 'step': 9994, 'epoch': 2} {'type': 'loss', 'content': 0.12164151668548584, 'timestamp': '2025-10-01 04:30:20.596983', 'step': 9995, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:20.650322', 'step': 9995, 'epoch': 2} {'type': 'loss', 'content': 0.17634405195713043, 'timestamp': '2025-10-01 04:30:20.656024', 'step': 9996, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:20.712892', 'step': 9996, 'epoch': 2} {'type': 'loss', 'content': 0.060828424990177155, 'timestamp': '2025-10-01 04:30:20.715119', 'step': 9997, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:20.769712', 'step': 9997, 'epoch': 2} {'type': 'loss', 'content': 0.06997215747833252, 'timestamp': '2025-10-01 04:30:20.772376', 'step': 9998, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:20.826137', 'step': 9998, 'epoch': 2} {'type': 'loss', 'content': 0.16424983739852905, 'timestamp': '2025-10-01 04:30:20.828319', 'step': 9999, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:20.881855', 'step': 9999, 'epoch': 2} {'type': 'loss', 'content': 0.10524281859397888, 'timestamp': '2025-10-01 04:30:20.887550', 'step': 10000, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 10000', 'timestamp': '2025-10-01 04:30:21.260995', 'step': 10000, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:30:21.315406', 'step': 10000, 'epoch': 2} {'type': 'loss', 'content': 0.16597360372543335, 'timestamp': '2025-10-01 04:30:21.317601', 'step': 10001, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:21.370988', 'step': 10001, 'epoch': 2} {'type': 'loss', 'content': 0.07727883756160736, 'timestamp': '2025-10-01 04:30:21.372976', 'step': 10002, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:21.427447', 'step': 10002, 'epoch': 2} {'type': 'loss', 'content': 0.08442458510398865, 'timestamp': '2025-10-01 04:30:21.429547', 'step': 10003, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:21.482698', 'step': 10003, 'epoch': 2} {'type': 'loss', 'content': 0.14253689348697662, 'timestamp': '2025-10-01 04:30:21.488597', 'step': 10004, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:21.541447', 'step': 10004, 'epoch': 2} {'type': 'loss', 'content': 0.13215501606464386, 'timestamp': '2025-10-01 04:30:21.544480', 'step': 10005, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:21.607190', 'step': 10005, 'epoch': 2} {'type': 'loss', 'content': 0.11548831313848495, 'timestamp': '2025-10-01 04:30:21.609262', 'step': 10006, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:21.663661', 'step': 10006, 'epoch': 2} {'type': 'loss', 'content': 0.15721102058887482, 'timestamp': '2025-10-01 04:30:21.665927', 'step': 10007, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:21.719345', 'step': 10007, 'epoch': 2} {'type': 'loss', 'content': 0.23713813722133636, 'timestamp': '2025-10-01 04:30:21.725306', 'step': 10008, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:21.778524', 'step': 10008, 'epoch': 2} {'type': 'loss', 'content': 0.14165586233139038, 'timestamp': '2025-10-01 04:30:21.780838', 'step': 10009, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:30:21.834101', 'step': 10009, 'epoch': 2} {'type': 'loss', 'content': 0.09116746485233307, 'timestamp': '2025-10-01 04:30:21.836219', 'step': 10010, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:21.892029', 'step': 10010, 'epoch': 2} {'type': 'loss', 'content': 0.15853096544742584, 'timestamp': '2025-10-01 04:30:21.894180', 'step': 10011, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:21.946943', 'step': 10011, 'epoch': 2} {'type': 'loss', 'content': 0.07607328146696091, 'timestamp': '2025-10-01 04:30:21.952873', 'step': 10012, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:22.005815', 'step': 10012, 'epoch': 2} {'type': 'loss', 'content': 0.17295534908771515, 'timestamp': '2025-10-01 04:30:22.007863', 'step': 10013, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:30:22.062608', 'step': 10013, 'epoch': 2} {'type': 'loss', 'content': 0.13783098757266998, 'timestamp': '2025-10-01 04:30:22.064839', 'step': 10014, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:22.119482', 'step': 10014, 'epoch': 2} {'type': 'loss', 'content': 0.13831627368927002, 'timestamp': '2025-10-01 04:30:22.121837', 'step': 10015, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:22.182195', 'step': 10015, 'epoch': 2} {'type': 'loss', 'content': 0.11754914373159409, 'timestamp': '2025-10-01 04:30:22.188419', 'step': 10016, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:22.241214', 'step': 10016, 'epoch': 2} {'type': 'loss', 'content': 0.08870846778154373, 'timestamp': '2025-10-01 04:30:22.243766', 'step': 10017, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:30:22.297351', 'step': 10017, 'epoch': 2} {'type': 'loss', 'content': 0.10632279515266418, 'timestamp': '2025-10-01 04:30:22.303447', 'step': 10018, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:22.357797', 'step': 10018, 'epoch': 2} {'type': 'loss', 'content': 0.07841247320175171, 'timestamp': '2025-10-01 04:30:22.362276', 'step': 10019, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:22.417004', 'step': 10019, 'epoch': 2} {'type': 'loss', 'content': 0.1400475651025772, 'timestamp': '2025-10-01 04:30:22.422590', 'step': 10020, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:22.483563', 'step': 10020, 'epoch': 2} {'type': 'loss', 'content': 0.08753900229930878, 'timestamp': '2025-10-01 04:30:22.486422', 'step': 10021, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:30:22.553753', 'step': 10021, 'epoch': 2} {'type': 'loss', 'content': 0.04786864295601845, 'timestamp': '2025-10-01 04:30:22.556828', 'step': 10022, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:22.610033', 'step': 10022, 'epoch': 2} {'type': 'loss', 'content': 0.08425480872392654, 'timestamp': '2025-10-01 04:30:22.612299', 'step': 10023, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:30:22.665872', 'step': 10023, 'epoch': 2} {'type': 'loss', 'content': 0.0302659273147583, 'timestamp': '2025-10-01 04:30:22.671649', 'step': 10024, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:22.729001', 'step': 10024, 'epoch': 2} {'type': 'loss', 'content': 0.11985602974891663, 'timestamp': '2025-10-01 04:30:22.732840', 'step': 10025, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:30:22.792273', 'step': 10025, 'epoch': 2} {'type': 'loss', 'content': 0.08237206935882568, 'timestamp': '2025-10-01 04:30:22.794341', 'step': 10026, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:22.847874', 'step': 10026, 'epoch': 2} {'type': 'loss', 'content': 0.13219422101974487, 'timestamp': '2025-10-01 04:30:22.866414', 'step': 10027, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:22.925752', 'step': 10027, 'epoch': 2} {'type': 'loss', 'content': 0.13846515119075775, 'timestamp': '2025-10-01 04:30:22.931546', 'step': 10028, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:22.984360', 'step': 10028, 'epoch': 2} {'type': 'loss', 'content': 0.08031873404979706, 'timestamp': '2025-10-01 04:30:22.986680', 'step': 10029, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:23.040644', 'step': 10029, 'epoch': 2} {'type': 'loss', 'content': 0.12359853833913803, 'timestamp': '2025-10-01 04:30:23.042813', 'step': 10030, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:30:23.100296', 'step': 10030, 'epoch': 2} {'type': 'loss', 'content': 0.19009655714035034, 'timestamp': '2025-10-01 04:30:23.108050', 'step': 10031, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:23.162246', 'step': 10031, 'epoch': 2} {'type': 'loss', 'content': 0.11157043278217316, 'timestamp': '2025-10-01 04:30:23.168124', 'step': 10032, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:23.227288', 'step': 10032, 'epoch': 2} {'type': 'loss', 'content': 0.08932415395975113, 'timestamp': '2025-10-01 04:30:23.229769', 'step': 10033, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:23.283911', 'step': 10033, 'epoch': 2} {'type': 'loss', 'content': 0.0850321426987648, 'timestamp': '2025-10-01 04:30:23.286374', 'step': 10034, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:30:23.346023', 'step': 10034, 'epoch': 2} {'type': 'loss', 'content': 0.09394282102584839, 'timestamp': '2025-10-01 04:30:23.348467', 'step': 10035, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:23.402166', 'step': 10035, 'epoch': 2} {'type': 'loss', 'content': 0.07187902927398682, 'timestamp': '2025-10-01 04:30:23.416765', 'step': 10036, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:23.478197', 'step': 10036, 'epoch': 2} {'type': 'loss', 'content': 0.10330627113580704, 'timestamp': '2025-10-01 04:30:23.484893', 'step': 10037, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:30:23.539815', 'step': 10037, 'epoch': 2} {'type': 'loss', 'content': 0.16428254544734955, 'timestamp': '2025-10-01 04:30:23.542071', 'step': 10038, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:23.601021', 'step': 10038, 'epoch': 2} {'type': 'loss', 'content': 0.1412399560213089, 'timestamp': '2025-10-01 04:30:23.603242', 'step': 10039, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:23.660178', 'step': 10039, 'epoch': 2} {'type': 'loss', 'content': 0.0582093708217144, 'timestamp': '2025-10-01 04:30:23.666225', 'step': 10040, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:23.719504', 'step': 10040, 'epoch': 2} {'type': 'loss', 'content': 0.13089396059513092, 'timestamp': '2025-10-01 04:30:23.722140', 'step': 10041, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:23.788995', 'step': 10041, 'epoch': 2} {'type': 'loss', 'content': 0.17538827657699585, 'timestamp': '2025-10-01 04:30:23.791725', 'step': 10042, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:30:23.846174', 'step': 10042, 'epoch': 2} {'type': 'loss', 'content': 0.15420016646385193, 'timestamp': '2025-10-01 04:30:23.851011', 'step': 10043, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:23.922529', 'step': 10043, 'epoch': 2} {'type': 'loss', 'content': 0.1580440253019333, 'timestamp': '2025-10-01 04:30:23.929004', 'step': 10044, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:30:23.987788', 'step': 10044, 'epoch': 2} {'type': 'loss', 'content': 0.1284286081790924, 'timestamp': '2025-10-01 04:30:23.990479', 'step': 10045, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:24.044560', 'step': 10045, 'epoch': 2} {'type': 'loss', 'content': 0.12144735455513, 'timestamp': '2025-10-01 04:30:24.046859', 'step': 10046, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:24.102098', 'step': 10046, 'epoch': 2} {'type': 'loss', 'content': 0.15482647716999054, 'timestamp': '2025-10-01 04:30:24.104443', 'step': 10047, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:24.167831', 'step': 10047, 'epoch': 2} {'type': 'loss', 'content': 0.1294192373752594, 'timestamp': '2025-10-01 04:30:24.174152', 'step': 10048, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:24.232159', 'step': 10048, 'epoch': 2} {'type': 'loss', 'content': 0.11749511957168579, 'timestamp': '2025-10-01 04:30:24.234483', 'step': 10049, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:24.288811', 'step': 10049, 'epoch': 2} {'type': 'loss', 'content': 0.0919872596859932, 'timestamp': '2025-10-01 04:30:24.291640', 'step': 10050, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:24.346008', 'step': 10050, 'epoch': 2} {'type': 'loss', 'content': 0.13117966055870056, 'timestamp': '2025-10-01 04:30:24.348948', 'step': 10051, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:24.403189', 'step': 10051, 'epoch': 2} {'type': 'loss', 'content': 0.0903034657239914, 'timestamp': '2025-10-01 04:30:24.408639', 'step': 10052, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:24.461021', 'step': 10052, 'epoch': 2} {'type': 'loss', 'content': 0.14135770499706268, 'timestamp': '2025-10-01 04:30:24.462952', 'step': 10053, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:24.516375', 'step': 10053, 'epoch': 2} {'type': 'loss', 'content': 0.1303255409002304, 'timestamp': '2025-10-01 04:30:24.518134', 'step': 10054, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:24.571164', 'step': 10054, 'epoch': 2} {'type': 'loss', 'content': 0.046773895621299744, 'timestamp': '2025-10-01 04:30:24.573501', 'step': 10055, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:24.626573', 'step': 10055, 'epoch': 2} {'type': 'loss', 'content': 0.1819504350423813, 'timestamp': '2025-10-01 04:30:24.632227', 'step': 10056, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:24.684549', 'step': 10056, 'epoch': 2} {'type': 'loss', 'content': 0.054783426225185394, 'timestamp': '2025-10-01 04:30:24.686658', 'step': 10057, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:24.741841', 'step': 10057, 'epoch': 2} {'type': 'loss', 'content': 0.07743538916110992, 'timestamp': '2025-10-01 04:30:24.744055', 'step': 10058, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:24.796927', 'step': 10058, 'epoch': 2} {'type': 'loss', 'content': 0.10883596539497375, 'timestamp': '2025-10-01 04:30:24.798928', 'step': 10059, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:24.851243', 'step': 10059, 'epoch': 2} {'type': 'loss', 'content': 0.1076829582452774, 'timestamp': '2025-10-01 04:30:24.867827', 'step': 10060, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:24.921804', 'step': 10060, 'epoch': 2} {'type': 'loss', 'content': 0.09501456469297409, 'timestamp': '2025-10-01 04:30:24.923928', 'step': 10061, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:24.976218', 'step': 10061, 'epoch': 2} {'type': 'loss', 'content': 0.10027709603309631, 'timestamp': '2025-10-01 04:30:24.978619', 'step': 10062, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:25.031915', 'step': 10062, 'epoch': 2} {'type': 'loss', 'content': 0.12008071690797806, 'timestamp': '2025-10-01 04:30:25.034515', 'step': 10063, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:25.088456', 'step': 10063, 'epoch': 2} {'type': 'loss', 'content': 0.08382833003997803, 'timestamp': '2025-10-01 04:30:25.094194', 'step': 10064, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:30:25.147559', 'step': 10064, 'epoch': 2} {'type': 'loss', 'content': 0.10315973311662674, 'timestamp': '2025-10-01 04:30:25.150231', 'step': 10065, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:30:25.204182', 'step': 10065, 'epoch': 2} {'type': 'loss', 'content': 0.17241057753562927, 'timestamp': '2025-10-01 04:30:25.206684', 'step': 10066, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:25.260232', 'step': 10066, 'epoch': 2} {'type': 'loss', 'content': 0.09532628208398819, 'timestamp': '2025-10-01 04:30:25.262064', 'step': 10067, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:25.314949', 'step': 10067, 'epoch': 2} {'type': 'loss', 'content': 0.05585745349526405, 'timestamp': '2025-10-01 04:30:25.321132', 'step': 10068, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:25.373637', 'step': 10068, 'epoch': 2} {'type': 'loss', 'content': 0.18750759959220886, 'timestamp': '2025-10-01 04:30:25.375826', 'step': 10069, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:25.429498', 'step': 10069, 'epoch': 2} {'type': 'loss', 'content': 0.15669742226600647, 'timestamp': '2025-10-01 04:30:25.433399', 'step': 10070, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:25.487061', 'step': 10070, 'epoch': 2} {'type': 'loss', 'content': 0.10256771743297577, 'timestamp': '2025-10-01 04:30:25.502128', 'step': 10071, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:25.556159', 'step': 10071, 'epoch': 2} {'type': 'loss', 'content': 0.12438715249300003, 'timestamp': '2025-10-01 04:30:25.561935', 'step': 10072, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:30:25.615930', 'step': 10072, 'epoch': 2} {'type': 'loss', 'content': 0.142070472240448, 'timestamp': '2025-10-01 04:30:25.617907', 'step': 10073, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:25.670620', 'step': 10073, 'epoch': 2} {'type': 'loss', 'content': 0.09907425940036774, 'timestamp': '2025-10-01 04:30:25.674240', 'step': 10074, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:25.727961', 'step': 10074, 'epoch': 2} {'type': 'loss', 'content': 0.10840843617916107, 'timestamp': '2025-10-01 04:30:25.729686', 'step': 10075, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:25.783080', 'step': 10075, 'epoch': 2} {'type': 'loss', 'content': 0.14973515272140503, 'timestamp': '2025-10-01 04:30:25.788797', 'step': 10076, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:30:25.842338', 'step': 10076, 'epoch': 2} {'type': 'loss', 'content': 0.2575741708278656, 'timestamp': '2025-10-01 04:30:25.844589', 'step': 10077, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:25.897608', 'step': 10077, 'epoch': 2} {'type': 'loss', 'content': 0.08925352245569229, 'timestamp': '2025-10-01 04:30:25.901063', 'step': 10078, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:25.964932', 'step': 10078, 'epoch': 2} {'type': 'loss', 'content': 0.15146055817604065, 'timestamp': '2025-10-01 04:30:25.967396', 'step': 10079, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:30:26.027001', 'step': 10079, 'epoch': 2} {'type': 'loss', 'content': 0.11704619973897934, 'timestamp': '2025-10-01 04:30:26.032684', 'step': 10080, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:26.087413', 'step': 10080, 'epoch': 2} {'type': 'loss', 'content': 0.07128129154443741, 'timestamp': '2025-10-01 04:30:26.089282', 'step': 10081, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:26.142414', 'step': 10081, 'epoch': 2} {'type': 'loss', 'content': 0.06854575872421265, 'timestamp': '2025-10-01 04:30:26.144347', 'step': 10082, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:26.198352', 'step': 10082, 'epoch': 2} {'type': 'loss', 'content': 0.12475527822971344, 'timestamp': '2025-10-01 04:30:26.200670', 'step': 10083, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:26.259455', 'step': 10083, 'epoch': 2} {'type': 'loss', 'content': 0.1642979085445404, 'timestamp': '2025-10-01 04:30:26.265284', 'step': 10084, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:26.322126', 'step': 10084, 'epoch': 2} {'type': 'loss', 'content': 0.09718752652406693, 'timestamp': '2025-10-01 04:30:26.324104', 'step': 10085, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:30:26.379858', 'step': 10085, 'epoch': 2} {'type': 'loss', 'content': 0.09713283181190491, 'timestamp': '2025-10-01 04:30:26.382086', 'step': 10086, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:26.435734', 'step': 10086, 'epoch': 2} {'type': 'loss', 'content': 0.11882948875427246, 'timestamp': '2025-10-01 04:30:26.444463', 'step': 10087, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:26.499764', 'step': 10087, 'epoch': 2} {'type': 'loss', 'content': 0.09676341712474823, 'timestamp': '2025-10-01 04:30:26.505458', 'step': 10088, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:30:26.565175', 'step': 10088, 'epoch': 2} {'type': 'loss', 'content': 0.10335720330476761, 'timestamp': '2025-10-01 04:30:26.567269', 'step': 10089, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:26.620941', 'step': 10089, 'epoch': 2} {'type': 'loss', 'content': 0.0671505481004715, 'timestamp': '2025-10-01 04:30:26.623481', 'step': 10090, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:26.677447', 'step': 10090, 'epoch': 2} {'type': 'loss', 'content': 0.18109770119190216, 'timestamp': '2025-10-01 04:30:26.679668', 'step': 10091, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:26.736240', 'step': 10091, 'epoch': 2} {'type': 'loss', 'content': 0.09651008248329163, 'timestamp': '2025-10-01 04:30:26.751978', 'step': 10092, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:26.805171', 'step': 10092, 'epoch': 2} {'type': 'loss', 'content': 0.18432120978832245, 'timestamp': '2025-10-01 04:30:26.807284', 'step': 10093, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:26.860806', 'step': 10093, 'epoch': 2} {'type': 'loss', 'content': 0.13890624046325684, 'timestamp': '2025-10-01 04:30:26.862834', 'step': 10094, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:26.916681', 'step': 10094, 'epoch': 2} {'type': 'loss', 'content': 0.1109476312994957, 'timestamp': '2025-10-01 04:30:26.918762', 'step': 10095, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:26.972184', 'step': 10095, 'epoch': 2} {'type': 'loss', 'content': 0.12441786378622055, 'timestamp': '2025-10-01 04:30:26.978170', 'step': 10096, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:27.031018', 'step': 10096, 'epoch': 2} {'type': 'loss', 'content': 0.08402209728956223, 'timestamp': '2025-10-01 04:30:27.033879', 'step': 10097, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:27.086806', 'step': 10097, 'epoch': 2} {'type': 'loss', 'content': 0.09902585297822952, 'timestamp': '2025-10-01 04:30:27.088936', 'step': 10098, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:27.142887', 'step': 10098, 'epoch': 2} {'type': 'loss', 'content': 0.13175269961357117, 'timestamp': '2025-10-01 04:30:27.145257', 'step': 10099, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:27.198416', 'step': 10099, 'epoch': 2} {'type': 'loss', 'content': 0.1023683175444603, 'timestamp': '2025-10-01 04:30:27.204615', 'step': 10100, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:27.257629', 'step': 10100, 'epoch': 2} {'type': 'loss', 'content': 0.20249612629413605, 'timestamp': '2025-10-01 04:30:27.259657', 'step': 10101, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:27.312323', 'step': 10101, 'epoch': 2} {'type': 'loss', 'content': 0.12023811787366867, 'timestamp': '2025-10-01 04:30:27.314905', 'step': 10102, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:30:27.373217', 'step': 10102, 'epoch': 2} {'type': 'loss', 'content': 0.10511088371276855, 'timestamp': '2025-10-01 04:30:27.376116', 'step': 10103, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:27.429752', 'step': 10103, 'epoch': 2} {'type': 'loss', 'content': 0.05796639248728752, 'timestamp': '2025-10-01 04:30:27.435844', 'step': 10104, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:27.488527', 'step': 10104, 'epoch': 2} {'type': 'loss', 'content': 0.0958210825920105, 'timestamp': '2025-10-01 04:30:27.491366', 'step': 10105, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:27.544421', 'step': 10105, 'epoch': 2} {'type': 'loss', 'content': 0.09186200797557831, 'timestamp': '2025-10-01 04:30:27.546618', 'step': 10106, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:27.600625', 'step': 10106, 'epoch': 2} {'type': 'loss', 'content': 0.11357831209897995, 'timestamp': '2025-10-01 04:30:27.603035', 'step': 10107, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:27.656363', 'step': 10107, 'epoch': 2} {'type': 'loss', 'content': 0.0977419838309288, 'timestamp': '2025-10-01 04:30:27.662146', 'step': 10108, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:27.715341', 'step': 10108, 'epoch': 2} {'type': 'loss', 'content': 0.09306756407022476, 'timestamp': '2025-10-01 04:30:27.717603', 'step': 10109, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:27.771252', 'step': 10109, 'epoch': 2} {'type': 'loss', 'content': 0.15663036704063416, 'timestamp': '2025-10-01 04:30:27.773546', 'step': 10110, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:27.827208', 'step': 10110, 'epoch': 2} {'type': 'loss', 'content': 0.1503164917230606, 'timestamp': '2025-10-01 04:30:27.829496', 'step': 10111, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:27.882658', 'step': 10111, 'epoch': 2} {'type': 'loss', 'content': 0.12320080399513245, 'timestamp': '2025-10-01 04:30:27.898111', 'step': 10112, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:27.950751', 'step': 10112, 'epoch': 2} {'type': 'loss', 'content': 0.12355141341686249, 'timestamp': '2025-10-01 04:30:27.953148', 'step': 10113, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:28.008252', 'step': 10113, 'epoch': 2} {'type': 'loss', 'content': 0.04507249966263771, 'timestamp': '2025-10-01 04:30:28.010666', 'step': 10114, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:30:28.065429', 'step': 10114, 'epoch': 2} {'type': 'loss', 'content': 0.1147627979516983, 'timestamp': '2025-10-01 04:30:28.067865', 'step': 10115, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:28.121496', 'step': 10115, 'epoch': 2} {'type': 'loss', 'content': 0.09848051518201828, 'timestamp': '2025-10-01 04:30:28.127698', 'step': 10116, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:30:28.180893', 'step': 10116, 'epoch': 2} {'type': 'loss', 'content': 0.08764439821243286, 'timestamp': '2025-10-01 04:30:28.183050', 'step': 10117, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:28.236111', 'step': 10117, 'epoch': 2} {'type': 'loss', 'content': 0.08153180032968521, 'timestamp': '2025-10-01 04:30:28.238292', 'step': 10118, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:28.292223', 'step': 10118, 'epoch': 2} {'type': 'loss', 'content': 0.06906033307313919, 'timestamp': '2025-10-01 04:30:28.294485', 'step': 10119, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:28.347857', 'step': 10119, 'epoch': 2} {'type': 'loss', 'content': 0.19637775421142578, 'timestamp': '2025-10-01 04:30:28.353744', 'step': 10120, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:28.407155', 'step': 10120, 'epoch': 2} {'type': 'loss', 'content': 0.2416868954896927, 'timestamp': '2025-10-01 04:30:28.409063', 'step': 10121, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:28.462754', 'step': 10121, 'epoch': 2} {'type': 'loss', 'content': 0.12144610285758972, 'timestamp': '2025-10-01 04:30:28.465126', 'step': 10122, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:30:28.520997', 'step': 10122, 'epoch': 2} {'type': 'loss', 'content': 0.1190468966960907, 'timestamp': '2025-10-01 04:30:28.524274', 'step': 10123, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:28.578877', 'step': 10123, 'epoch': 2} {'type': 'loss', 'content': 0.12054569274187088, 'timestamp': '2025-10-01 04:30:28.590062', 'step': 10124, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:28.648030', 'step': 10124, 'epoch': 2} {'type': 'loss', 'content': 0.15139006078243256, 'timestamp': '2025-10-01 04:30:28.651025', 'step': 10125, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:28.704921', 'step': 10125, 'epoch': 2} {'type': 'loss', 'content': 0.10053251683712006, 'timestamp': '2025-10-01 04:30:28.707177', 'step': 10126, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:30:28.762148', 'step': 10126, 'epoch': 2} {'type': 'loss', 'content': 0.14357459545135498, 'timestamp': '2025-10-01 04:30:28.764536', 'step': 10127, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:28.819334', 'step': 10127, 'epoch': 2} {'type': 'loss', 'content': 0.1375831663608551, 'timestamp': '2025-10-01 04:30:28.839808', 'step': 10128, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:30:28.906042', 'step': 10128, 'epoch': 2} {'type': 'loss', 'content': 0.15504607558250427, 'timestamp': '2025-10-01 04:30:28.908582', 'step': 10129, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:28.981051', 'step': 10129, 'epoch': 2} {'type': 'loss', 'content': 0.10761450976133347, 'timestamp': '2025-10-01 04:30:28.988822', 'step': 10130, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:29.065657', 'step': 10130, 'epoch': 2} {'type': 'loss', 'content': 0.12525850534439087, 'timestamp': '2025-10-01 04:30:29.079602', 'step': 10131, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:29.136436', 'step': 10131, 'epoch': 2} {'type': 'loss', 'content': 0.08394555747509003, 'timestamp': '2025-10-01 04:30:29.149532', 'step': 10132, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:29.214713', 'step': 10132, 'epoch': 2} {'type': 'loss', 'content': 0.09512065351009369, 'timestamp': '2025-10-01 04:30:29.217170', 'step': 10133, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:29.286799', 'step': 10133, 'epoch': 2} {'type': 'loss', 'content': 0.11624466627836227, 'timestamp': '2025-10-01 04:30:29.291295', 'step': 10134, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:29.365178', 'step': 10134, 'epoch': 2} {'type': 'loss', 'content': 0.1292765885591507, 'timestamp': '2025-10-01 04:30:29.367694', 'step': 10135, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-01 04:30:29.423339', 'step': 10135, 'epoch': 2} {'type': 'loss', 'content': 0.06877826154232025, 'timestamp': '2025-10-01 04:30:29.436251', 'step': 10136, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:29.490265', 'step': 10136, 'epoch': 2} {'type': 'loss', 'content': 0.11795254051685333, 'timestamp': '2025-10-01 04:30:29.509025', 'step': 10137, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:29.594880', 'step': 10137, 'epoch': 2} {'type': 'loss', 'content': 0.10090839862823486, 'timestamp': '2025-10-01 04:30:29.598794', 'step': 10138, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:29.673221', 'step': 10138, 'epoch': 2} {'type': 'loss', 'content': 0.0713188424706459, 'timestamp': '2025-10-01 04:30:29.676202', 'step': 10139, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:29.736226', 'step': 10139, 'epoch': 2} {'type': 'loss', 'content': 0.15282568335533142, 'timestamp': '2025-10-01 04:30:29.743507', 'step': 10140, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:29.810222', 'step': 10140, 'epoch': 2} {'type': 'loss', 'content': 0.06326790899038315, 'timestamp': '2025-10-01 04:30:29.813861', 'step': 10141, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:30:29.884314', 'step': 10141, 'epoch': 2} {'type': 'loss', 'content': 0.12197481840848923, 'timestamp': '2025-10-01 04:30:29.891416', 'step': 10142, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:29.959086', 'step': 10142, 'epoch': 2} {'type': 'loss', 'content': 0.1555405855178833, 'timestamp': '2025-10-01 04:30:29.961656', 'step': 10143, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:30:30.024559', 'step': 10143, 'epoch': 2} {'type': 'loss', 'content': 0.10425950586795807, 'timestamp': '2025-10-01 04:30:30.030742', 'step': 10144, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:30.090410', 'step': 10144, 'epoch': 2} {'type': 'loss', 'content': 0.04603489115834236, 'timestamp': '2025-10-01 04:30:30.092437', 'step': 10145, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:30.146371', 'step': 10145, 'epoch': 2} {'type': 'loss', 'content': 0.10585682839155197, 'timestamp': '2025-10-01 04:30:30.151497', 'step': 10146, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:30.204739', 'step': 10146, 'epoch': 2} {'type': 'loss', 'content': 0.12177704274654388, 'timestamp': '2025-10-01 04:30:30.206948', 'step': 10147, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:30.261841', 'step': 10147, 'epoch': 2} {'type': 'loss', 'content': 0.11378027498722076, 'timestamp': '2025-10-01 04:30:30.268460', 'step': 10148, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:30.333332', 'step': 10148, 'epoch': 2} {'type': 'loss', 'content': 0.11465565115213394, 'timestamp': '2025-10-01 04:30:30.335396', 'step': 10149, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:30.389183', 'step': 10149, 'epoch': 2} {'type': 'loss', 'content': 0.13437339663505554, 'timestamp': '2025-10-01 04:30:30.399593', 'step': 10150, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:30.454440', 'step': 10150, 'epoch': 2} {'type': 'loss', 'content': 0.17992430925369263, 'timestamp': '2025-10-01 04:30:30.456795', 'step': 10151, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:30.510643', 'step': 10151, 'epoch': 2} {'type': 'loss', 'content': 0.18608082830905914, 'timestamp': '2025-10-01 04:30:30.516842', 'step': 10152, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:30.569833', 'step': 10152, 'epoch': 2} {'type': 'loss', 'content': 0.0881812795996666, 'timestamp': '2025-10-01 04:30:30.572071', 'step': 10153, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:30:30.626046', 'step': 10153, 'epoch': 2} {'type': 'loss', 'content': 0.12443175166845322, 'timestamp': '2025-10-01 04:30:30.628171', 'step': 10154, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:30.681403', 'step': 10154, 'epoch': 2} {'type': 'loss', 'content': 0.12971289455890656, 'timestamp': '2025-10-01 04:30:30.683666', 'step': 10155, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:30.737421', 'step': 10155, 'epoch': 2} {'type': 'loss', 'content': 0.1128329411149025, 'timestamp': '2025-10-01 04:30:30.743707', 'step': 10156, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:30.797154', 'step': 10156, 'epoch': 2} {'type': 'loss', 'content': 0.10572782903909683, 'timestamp': '2025-10-01 04:30:30.799196', 'step': 10157, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:30:30.852754', 'step': 10157, 'epoch': 2} {'type': 'loss', 'content': 0.13216567039489746, 'timestamp': '2025-10-01 04:30:30.854952', 'step': 10158, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:30:30.908371', 'step': 10158, 'epoch': 2} {'type': 'loss', 'content': 0.16034558415412903, 'timestamp': '2025-10-01 04:30:30.910519', 'step': 10159, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:30.963801', 'step': 10159, 'epoch': 2} {'type': 'loss', 'content': 0.1062529981136322, 'timestamp': '2025-10-01 04:30:30.970038', 'step': 10160, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:31.023474', 'step': 10160, 'epoch': 2} {'type': 'loss', 'content': 0.06962607055902481, 'timestamp': '2025-10-01 04:30:31.025877', 'step': 10161, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:31.085975', 'step': 10161, 'epoch': 2} {'type': 'loss', 'content': 0.12917783856391907, 'timestamp': '2025-10-01 04:30:31.088350', 'step': 10162, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:31.148909', 'step': 10162, 'epoch': 2} {'type': 'loss', 'content': 0.22768819332122803, 'timestamp': '2025-10-01 04:30:31.151816', 'step': 10163, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:31.211921', 'step': 10163, 'epoch': 2} {'type': 'loss', 'content': 0.14883558452129364, 'timestamp': '2025-10-01 04:30:31.219102', 'step': 10164, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:31.279247', 'step': 10164, 'epoch': 2} {'type': 'loss', 'content': 0.17077645659446716, 'timestamp': '2025-10-01 04:30:31.282226', 'step': 10165, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:31.344749', 'step': 10165, 'epoch': 2} {'type': 'loss', 'content': 0.15842662751674652, 'timestamp': '2025-10-01 04:30:31.347713', 'step': 10166, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:31.409667', 'step': 10166, 'epoch': 2} {'type': 'loss', 'content': 0.2111114114522934, 'timestamp': '2025-10-01 04:30:31.412152', 'step': 10167, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:30:31.474162', 'step': 10167, 'epoch': 2} {'type': 'loss', 'content': 0.13539324700832367, 'timestamp': '2025-10-01 04:30:31.497033', 'step': 10168, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:30:31.571503', 'step': 10168, 'epoch': 2} {'type': 'loss', 'content': 0.1692640483379364, 'timestamp': '2025-10-01 04:30:31.574875', 'step': 10169, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:31.635389', 'step': 10169, 'epoch': 2} {'type': 'loss', 'content': 0.22015702724456787, 'timestamp': '2025-10-01 04:30:31.637943', 'step': 10170, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:31.701027', 'step': 10170, 'epoch': 2} {'type': 'loss', 'content': 0.14868073165416718, 'timestamp': '2025-10-01 04:30:31.703569', 'step': 10171, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:31.762199', 'step': 10171, 'epoch': 2} {'type': 'loss', 'content': 0.16402631998062134, 'timestamp': '2025-10-01 04:30:31.769537', 'step': 10172, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:30:31.825651', 'step': 10172, 'epoch': 2} {'type': 'loss', 'content': 0.18191516399383545, 'timestamp': '2025-10-01 04:30:31.828445', 'step': 10173, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:31.882781', 'step': 10173, 'epoch': 2} {'type': 'loss', 'content': 0.09479578584432602, 'timestamp': '2025-10-01 04:30:31.904902', 'step': 10174, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:31.967556', 'step': 10174, 'epoch': 2} {'type': 'loss', 'content': 0.18377657234668732, 'timestamp': '2025-10-01 04:30:31.970241', 'step': 10175, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:30:32.024539', 'step': 10175, 'epoch': 2} {'type': 'loss', 'content': 0.09957733750343323, 'timestamp': '2025-10-01 04:30:32.031647', 'step': 10176, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:32.086840', 'step': 10176, 'epoch': 2} {'type': 'loss', 'content': 0.10039161145687103, 'timestamp': '2025-10-01 04:30:32.089452', 'step': 10177, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:32.147690', 'step': 10177, 'epoch': 2} {'type': 'loss', 'content': 0.14926432073116302, 'timestamp': '2025-10-01 04:30:32.150337', 'step': 10178, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:32.206606', 'step': 10178, 'epoch': 2} {'type': 'loss', 'content': 0.08574631810188293, 'timestamp': '2025-10-01 04:30:32.209190', 'step': 10179, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:32.267083', 'step': 10179, 'epoch': 2} {'type': 'loss', 'content': 0.2561010718345642, 'timestamp': '2025-10-01 04:30:32.275345', 'step': 10180, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:30:32.331260', 'step': 10180, 'epoch': 2} {'type': 'loss', 'content': 0.1894717961549759, 'timestamp': '2025-10-01 04:30:32.334001', 'step': 10181, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:32.404715', 'step': 10181, 'epoch': 2} {'type': 'loss', 'content': 0.11686313897371292, 'timestamp': '2025-10-01 04:30:32.407141', 'step': 10182, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:32.463412', 'step': 10182, 'epoch': 2} {'type': 'loss', 'content': 0.13982050120830536, 'timestamp': '2025-10-01 04:30:32.465743', 'step': 10183, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:32.533001', 'step': 10183, 'epoch': 2} {'type': 'loss', 'content': 0.13723964989185333, 'timestamp': '2025-10-01 04:30:32.539542', 'step': 10184, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:32.594868', 'step': 10184, 'epoch': 2} {'type': 'loss', 'content': 0.15174734592437744, 'timestamp': '2025-10-01 04:30:32.597012', 'step': 10185, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:32.652340', 'step': 10185, 'epoch': 2} {'type': 'loss', 'content': 0.08178520947694778, 'timestamp': '2025-10-01 04:30:32.655287', 'step': 10186, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:32.710838', 'step': 10186, 'epoch': 2} {'type': 'loss', 'content': 0.18592120707035065, 'timestamp': '2025-10-01 04:30:32.713077', 'step': 10187, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:32.768007', 'step': 10187, 'epoch': 2} {'type': 'loss', 'content': 0.18033282458782196, 'timestamp': '2025-10-01 04:30:32.774532', 'step': 10188, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:32.828019', 'step': 10188, 'epoch': 2} {'type': 'loss', 'content': 0.09173320978879929, 'timestamp': '2025-10-01 04:30:32.830132', 'step': 10189, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:32.905508', 'step': 10189, 'epoch': 2} {'type': 'loss', 'content': 0.0877029076218605, 'timestamp': '2025-10-01 04:30:32.907505', 'step': 10190, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:32.971672', 'step': 10190, 'epoch': 2} {'type': 'loss', 'content': 0.24934706091880798, 'timestamp': '2025-10-01 04:30:32.974431', 'step': 10191, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:33.035416', 'step': 10191, 'epoch': 2} {'type': 'loss', 'content': 0.12955793738365173, 'timestamp': '2025-10-01 04:30:33.042677', 'step': 10192, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:30:33.100080', 'step': 10192, 'epoch': 2} {'type': 'loss', 'content': 0.10512319207191467, 'timestamp': '2025-10-01 04:30:33.103703', 'step': 10193, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:33.158626', 'step': 10193, 'epoch': 2} {'type': 'loss', 'content': 0.13022325932979584, 'timestamp': '2025-10-01 04:30:33.161019', 'step': 10194, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:33.214924', 'step': 10194, 'epoch': 2} {'type': 'loss', 'content': 0.16598385572433472, 'timestamp': '2025-10-01 04:30:33.217516', 'step': 10195, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:33.270137', 'step': 10195, 'epoch': 2} {'type': 'loss', 'content': 0.15175844728946686, 'timestamp': '2025-10-01 04:30:33.277420', 'step': 10196, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:30:33.330681', 'step': 10196, 'epoch': 2} {'type': 'loss', 'content': 0.09432515501976013, 'timestamp': '2025-10-01 04:30:33.332874', 'step': 10197, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:33.385086', 'step': 10197, 'epoch': 2} {'type': 'loss', 'content': 0.1590525358915329, 'timestamp': '2025-10-01 04:30:33.387197', 'step': 10198, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:33.439619', 'step': 10198, 'epoch': 2} {'type': 'loss', 'content': 0.07586655765771866, 'timestamp': '2025-10-01 04:30:33.442925', 'step': 10199, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:33.497772', 'step': 10199, 'epoch': 2} {'type': 'loss', 'content': 0.13823620975017548, 'timestamp': '2025-10-01 04:30:33.504160', 'step': 10200, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:30:33.557340', 'step': 10200, 'epoch': 2} {'type': 'loss', 'content': 0.16622568666934967, 'timestamp': '2025-10-01 04:30:33.559310', 'step': 10201, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:33.612605', 'step': 10201, 'epoch': 2} {'type': 'loss', 'content': 0.07157725840806961, 'timestamp': '2025-10-01 04:30:33.615710', 'step': 10202, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:33.669605', 'step': 10202, 'epoch': 2} {'type': 'loss', 'content': 0.12035961449146271, 'timestamp': '2025-10-01 04:30:33.671932', 'step': 10203, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:30:33.725591', 'step': 10203, 'epoch': 2} {'type': 'loss', 'content': 0.11478414386510849, 'timestamp': '2025-10-01 04:30:33.732483', 'step': 10204, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:33.785925', 'step': 10204, 'epoch': 2} {'type': 'loss', 'content': 0.1668020337820053, 'timestamp': '2025-10-01 04:30:33.788497', 'step': 10205, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:33.841756', 'step': 10205, 'epoch': 2} {'type': 'loss', 'content': 0.1013173907995224, 'timestamp': '2025-10-01 04:30:33.843899', 'step': 10206, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:30:33.896877', 'step': 10206, 'epoch': 2} {'type': 'loss', 'content': 0.09695260971784592, 'timestamp': '2025-10-01 04:30:33.898955', 'step': 10207, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:33.951779', 'step': 10207, 'epoch': 2} {'type': 'loss', 'content': 0.09541427344083786, 'timestamp': '2025-10-01 04:30:33.958046', 'step': 10208, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-01 04:30:34.010770', 'step': 10208, 'epoch': 2} {'type': 'loss', 'content': 0.13411164283752441, 'timestamp': '2025-10-01 04:30:34.012967', 'step': 10209, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:34.065860', 'step': 10209, 'epoch': 2} {'type': 'loss', 'content': 0.09358500689268112, 'timestamp': '2025-10-01 04:30:34.068070', 'step': 10210, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:34.121483', 'step': 10210, 'epoch': 2} {'type': 'loss', 'content': 0.13617785274982452, 'timestamp': '2025-10-01 04:30:34.123444', 'step': 10211, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:30:34.176298', 'step': 10211, 'epoch': 2} {'type': 'loss', 'content': 0.08527188748121262, 'timestamp': '2025-10-01 04:30:34.182702', 'step': 10212, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:30:34.234871', 'step': 10212, 'epoch': 2} {'type': 'loss', 'content': 0.08953986316919327, 'timestamp': '2025-10-01 04:30:34.237822', 'step': 10213, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:34.290410', 'step': 10213, 'epoch': 2} {'type': 'loss', 'content': 0.20585525035858154, 'timestamp': '2025-10-01 04:30:34.293459', 'step': 10214, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:34.347681', 'step': 10214, 'epoch': 2} {'type': 'loss', 'content': 0.16561263799667358, 'timestamp': '2025-10-01 04:30:34.349876', 'step': 10215, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:34.404846', 'step': 10215, 'epoch': 2} {'type': 'loss', 'content': 0.09233366698026657, 'timestamp': '2025-10-01 04:30:34.410533', 'step': 10216, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:34.469670', 'step': 10216, 'epoch': 2} {'type': 'loss', 'content': 0.11609435081481934, 'timestamp': '2025-10-01 04:30:34.471626', 'step': 10217, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:34.524362', 'step': 10217, 'epoch': 2} {'type': 'loss', 'content': 0.18164949119091034, 'timestamp': '2025-10-01 04:30:34.526632', 'step': 10218, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:30:34.579996', 'step': 10218, 'epoch': 2} {'type': 'loss', 'content': 0.10270123928785324, 'timestamp': '2025-10-01 04:30:34.584417', 'step': 10219, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:30:34.643088', 'step': 10219, 'epoch': 2} {'type': 'loss', 'content': 0.08136338740587234, 'timestamp': '2025-10-01 04:30:34.648693', 'step': 10220, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:34.701436', 'step': 10220, 'epoch': 2} {'type': 'loss', 'content': 0.1402045041322708, 'timestamp': '2025-10-01 04:30:34.707987', 'step': 10221, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:34.760809', 'step': 10221, 'epoch': 2} {'type': 'loss', 'content': 0.0913999006152153, 'timestamp': '2025-10-01 04:30:34.763214', 'step': 10222, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:30:34.816001', 'step': 10222, 'epoch': 2} {'type': 'loss', 'content': 0.10329725593328476, 'timestamp': '2025-10-01 04:30:34.824032', 'step': 10223, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:34.876264', 'step': 10223, 'epoch': 2} {'type': 'loss', 'content': 0.1633034497499466, 'timestamp': '2025-10-01 04:30:34.881946', 'step': 10224, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:34.934351', 'step': 10224, 'epoch': 2} {'type': 'loss', 'content': 0.15398390591144562, 'timestamp': '2025-10-01 04:30:34.936419', 'step': 10225, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:30:34.989005', 'step': 10225, 'epoch': 2} {'type': 'loss', 'content': 0.1424858272075653, 'timestamp': '2025-10-01 04:30:34.991069', 'step': 10226, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:30:35.043762', 'step': 10226, 'epoch': 2} {'type': 'loss', 'content': 0.145652636885643, 'timestamp': '2025-10-01 04:30:35.045883', 'step': 10227, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:35.099321', 'step': 10227, 'epoch': 2} {'type': 'loss', 'content': 0.09774920344352722, 'timestamp': '2025-10-01 04:30:35.105201', 'step': 10228, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:35.157986', 'step': 10228, 'epoch': 2} {'type': 'loss', 'content': 0.10687524080276489, 'timestamp': '2025-10-01 04:30:35.160226', 'step': 10229, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:35.213025', 'step': 10229, 'epoch': 2} {'type': 'loss', 'content': 0.19045133888721466, 'timestamp': '2025-10-01 04:30:35.215057', 'step': 10230, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:35.268499', 'step': 10230, 'epoch': 2} {'type': 'loss', 'content': 0.07851964980363846, 'timestamp': '2025-10-01 04:30:35.270813', 'step': 10231, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:35.323717', 'step': 10231, 'epoch': 2} {'type': 'loss', 'content': 0.2072134166955948, 'timestamp': '2025-10-01 04:30:35.329379', 'step': 10232, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:35.381583', 'step': 10232, 'epoch': 2} {'type': 'loss', 'content': 0.062477827072143555, 'timestamp': '2025-10-01 04:30:35.383655', 'step': 10233, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:35.436545', 'step': 10233, 'epoch': 2} {'type': 'loss', 'content': 0.17549268901348114, 'timestamp': '2025-10-01 04:30:35.438640', 'step': 10234, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:30:35.492354', 'step': 10234, 'epoch': 2} {'type': 'loss', 'content': 0.13361667096614838, 'timestamp': '2025-10-01 04:30:35.501809', 'step': 10235, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:35.555077', 'step': 10235, 'epoch': 2} {'type': 'loss', 'content': 0.10129019618034363, 'timestamp': '2025-10-01 04:30:35.561507', 'step': 10236, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:30:35.615183', 'step': 10236, 'epoch': 2} {'type': 'loss', 'content': 0.12334910780191422, 'timestamp': '2025-10-01 04:30:35.617236', 'step': 10237, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:35.670655', 'step': 10237, 'epoch': 2} {'type': 'loss', 'content': 0.11713024228811264, 'timestamp': '2025-10-01 04:30:35.673063', 'step': 10238, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:35.730063', 'step': 10238, 'epoch': 2} {'type': 'loss', 'content': 0.08387662470340729, 'timestamp': '2025-10-01 04:30:35.732208', 'step': 10239, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:35.784869', 'step': 10239, 'epoch': 2} {'type': 'loss', 'content': 0.18405401706695557, 'timestamp': '2025-10-01 04:30:35.790772', 'step': 10240, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:35.843381', 'step': 10240, 'epoch': 2} {'type': 'loss', 'content': 0.10736749321222305, 'timestamp': '2025-10-01 04:30:35.845567', 'step': 10241, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:35.898525', 'step': 10241, 'epoch': 2} {'type': 'loss', 'content': 0.14503028988838196, 'timestamp': '2025-10-01 04:30:35.900794', 'step': 10242, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:35.954365', 'step': 10242, 'epoch': 2} {'type': 'loss', 'content': 0.1419416069984436, 'timestamp': '2025-10-01 04:30:35.956391', 'step': 10243, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:36.009323', 'step': 10243, 'epoch': 2} {'type': 'loss', 'content': 0.17071470618247986, 'timestamp': '2025-10-01 04:30:36.015172', 'step': 10244, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:36.067634', 'step': 10244, 'epoch': 2} {'type': 'loss', 'content': 0.03738086670637131, 'timestamp': '2025-10-01 04:30:36.069903', 'step': 10245, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:36.122655', 'step': 10245, 'epoch': 2} {'type': 'loss', 'content': 0.21089500188827515, 'timestamp': '2025-10-01 04:30:36.124775', 'step': 10246, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:36.177251', 'step': 10246, 'epoch': 2} {'type': 'loss', 'content': 0.1390606164932251, 'timestamp': '2025-10-01 04:30:36.179924', 'step': 10247, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:36.234085', 'step': 10247, 'epoch': 2} {'type': 'loss', 'content': 0.12156590819358826, 'timestamp': '2025-10-01 04:30:36.240398', 'step': 10248, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:30:36.293379', 'step': 10248, 'epoch': 2} {'type': 'loss', 'content': 0.10547272115945816, 'timestamp': '2025-10-01 04:30:36.295576', 'step': 10249, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:36.348936', 'step': 10249, 'epoch': 2} {'type': 'loss', 'content': 0.20120538771152496, 'timestamp': '2025-10-01 04:30:36.351240', 'step': 10250, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:36.404399', 'step': 10250, 'epoch': 2} {'type': 'loss', 'content': 0.09291615337133408, 'timestamp': '2025-10-01 04:30:36.406597', 'step': 10251, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:30:36.469911', 'step': 10251, 'epoch': 2} {'type': 'loss', 'content': 0.34657424688339233, 'timestamp': '2025-10-01 04:30:36.475911', 'step': 10252, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-01 04:30:49.343614', 'step': 10252, 'epoch': 2} {'type': 'pplx', 'content': 11467.198118003063, 'timestamp': '2025-10-01 04:30:49.346772', 'step': 10252, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:49.399292', 'step': 10252, 'epoch': 2} {'type': 'loss', 'content': 0.10135672241449356, 'timestamp': '2025-10-01 04:30:49.401613', 'step': 10253, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:30:49.459089', 'step': 10253, 'epoch': 2} {'type': 'loss', 'content': 0.15672187507152557, 'timestamp': '2025-10-01 04:30:49.461277', 'step': 10254, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:49.515104', 'step': 10254, 'epoch': 2} {'type': 'loss', 'content': 0.11216331273317337, 'timestamp': '2025-10-01 04:30:49.517411', 'step': 10255, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:49.570811', 'step': 10255, 'epoch': 2} {'type': 'loss', 'content': 0.09388767182826996, 'timestamp': '2025-10-01 04:30:49.576720', 'step': 10256, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:49.629051', 'step': 10256, 'epoch': 2} {'type': 'loss', 'content': 0.13547329604625702, 'timestamp': '2025-10-01 04:30:49.631315', 'step': 10257, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:30:49.684164', 'step': 10257, 'epoch': 2} {'type': 'loss', 'content': 0.14236976206302643, 'timestamp': '2025-10-01 04:30:49.688159', 'step': 10258, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:49.741396', 'step': 10258, 'epoch': 2} {'type': 'loss', 'content': 0.06448141485452652, 'timestamp': '2025-10-01 04:30:49.744128', 'step': 10259, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:49.797275', 'step': 10259, 'epoch': 2} {'type': 'loss', 'content': 0.123623326420784, 'timestamp': '2025-10-01 04:30:49.803683', 'step': 10260, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:30:49.856410', 'step': 10260, 'epoch': 2} {'type': 'loss', 'content': 0.06842643022537231, 'timestamp': '2025-10-01 04:30:49.858847', 'step': 10261, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:49.914601', 'step': 10261, 'epoch': 2} {'type': 'loss', 'content': 0.13038240373134613, 'timestamp': '2025-10-01 04:30:49.916867', 'step': 10262, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:30:49.970697', 'step': 10262, 'epoch': 2} {'type': 'loss', 'content': 0.06165315955877304, 'timestamp': '2025-10-01 04:30:49.972876', 'step': 10263, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:50.025793', 'step': 10263, 'epoch': 2} {'type': 'loss', 'content': 0.1516476571559906, 'timestamp': '2025-10-01 04:30:50.031506', 'step': 10264, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:50.084201', 'step': 10264, 'epoch': 2} {'type': 'loss', 'content': 0.14079096913337708, 'timestamp': '2025-10-01 04:30:50.086450', 'step': 10265, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:50.139456', 'step': 10265, 'epoch': 2} {'type': 'loss', 'content': 0.09102007001638412, 'timestamp': '2025-10-01 04:30:50.141520', 'step': 10266, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:30:50.200189', 'step': 10266, 'epoch': 2} {'type': 'loss', 'content': 0.18824969232082367, 'timestamp': '2025-10-01 04:30:50.202399', 'step': 10267, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:50.255027', 'step': 10267, 'epoch': 2} {'type': 'loss', 'content': 0.16732072830200195, 'timestamp': '2025-10-01 04:30:50.260615', 'step': 10268, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:50.312568', 'step': 10268, 'epoch': 2} {'type': 'loss', 'content': 0.2895980775356293, 'timestamp': '2025-10-01 04:30:50.314715', 'step': 10269, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:50.368245', 'step': 10269, 'epoch': 2} {'type': 'loss', 'content': 0.13789795339107513, 'timestamp': '2025-10-01 04:30:50.370395', 'step': 10270, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:50.424522', 'step': 10270, 'epoch': 2} {'type': 'loss', 'content': 0.08517061173915863, 'timestamp': '2025-10-01 04:30:50.426739', 'step': 10271, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:50.479625', 'step': 10271, 'epoch': 2} {'type': 'loss', 'content': 0.04152637720108032, 'timestamp': '2025-10-01 04:30:50.485367', 'step': 10272, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:30:50.537974', 'step': 10272, 'epoch': 2} {'type': 'loss', 'content': 0.12984658777713776, 'timestamp': '2025-10-01 04:30:50.540096', 'step': 10273, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:50.593757', 'step': 10273, 'epoch': 2} {'type': 'loss', 'content': 0.07569461315870285, 'timestamp': '2025-10-01 04:30:50.596210', 'step': 10274, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:50.649199', 'step': 10274, 'epoch': 2} {'type': 'loss', 'content': 0.12175757437944412, 'timestamp': '2025-10-01 04:30:50.651540', 'step': 10275, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:50.705304', 'step': 10275, 'epoch': 2} {'type': 'loss', 'content': 0.1291721612215042, 'timestamp': '2025-10-01 04:30:50.711250', 'step': 10276, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:50.763065', 'step': 10276, 'epoch': 2} {'type': 'loss', 'content': 0.09609867632389069, 'timestamp': '2025-10-01 04:30:50.765411', 'step': 10277, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:50.819566', 'step': 10277, 'epoch': 2} {'type': 'loss', 'content': 0.09094080328941345, 'timestamp': '2025-10-01 04:30:50.821619', 'step': 10278, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:30:50.874954', 'step': 10278, 'epoch': 2} {'type': 'loss', 'content': 0.2646547257900238, 'timestamp': '2025-10-01 04:30:50.877154', 'step': 10279, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:50.930201', 'step': 10279, 'epoch': 2} {'type': 'loss', 'content': 0.1707673966884613, 'timestamp': '2025-10-01 04:30:50.935992', 'step': 10280, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:50.995219', 'step': 10280, 'epoch': 2} {'type': 'loss', 'content': 0.14545148611068726, 'timestamp': '2025-10-01 04:30:50.997811', 'step': 10281, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:51.050994', 'step': 10281, 'epoch': 2} {'type': 'loss', 'content': 0.19079220294952393, 'timestamp': '2025-10-01 04:30:51.053201', 'step': 10282, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:51.106159', 'step': 10282, 'epoch': 2} {'type': 'loss', 'content': 0.11192136257886887, 'timestamp': '2025-10-01 04:30:51.108446', 'step': 10283, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:51.161610', 'step': 10283, 'epoch': 2} {'type': 'loss', 'content': 0.056679729372262955, 'timestamp': '2025-10-01 04:30:51.168728', 'step': 10284, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:51.222612', 'step': 10284, 'epoch': 2} {'type': 'loss', 'content': 0.11883939802646637, 'timestamp': '2025-10-01 04:30:51.231958', 'step': 10285, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:30:51.287833', 'step': 10285, 'epoch': 2} {'type': 'loss', 'content': 0.14591625332832336, 'timestamp': '2025-10-01 04:30:51.289779', 'step': 10286, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:51.342756', 'step': 10286, 'epoch': 2} {'type': 'loss', 'content': 0.12564633786678314, 'timestamp': '2025-10-01 04:30:51.347786', 'step': 10287, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:30:51.400911', 'step': 10287, 'epoch': 2} {'type': 'loss', 'content': 0.06228673830628395, 'timestamp': '2025-10-01 04:30:51.406881', 'step': 10288, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:51.460285', 'step': 10288, 'epoch': 2} {'type': 'loss', 'content': 0.07019134610891342, 'timestamp': '2025-10-01 04:30:51.463009', 'step': 10289, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:51.516166', 'step': 10289, 'epoch': 2} {'type': 'loss', 'content': 0.19122949242591858, 'timestamp': '2025-10-01 04:30:51.518600', 'step': 10290, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:51.571715', 'step': 10290, 'epoch': 2} {'type': 'loss', 'content': 0.16255298256874084, 'timestamp': '2025-10-01 04:30:51.575440', 'step': 10291, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:51.627979', 'step': 10291, 'epoch': 2} {'type': 'loss', 'content': 0.11102976649999619, 'timestamp': '2025-10-01 04:30:51.634202', 'step': 10292, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:51.693831', 'step': 10292, 'epoch': 2} {'type': 'loss', 'content': 0.07931961119174957, 'timestamp': '2025-10-01 04:30:51.698890', 'step': 10293, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:51.755979', 'step': 10293, 'epoch': 2} {'type': 'loss', 'content': 0.10375948995351791, 'timestamp': '2025-10-01 04:30:51.758035', 'step': 10294, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:30:51.810709', 'step': 10294, 'epoch': 2} {'type': 'loss', 'content': 0.058484505861997604, 'timestamp': '2025-10-01 04:30:51.812928', 'step': 10295, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:51.865891', 'step': 10295, 'epoch': 2} {'type': 'loss', 'content': 0.1160418912768364, 'timestamp': '2025-10-01 04:30:51.871585', 'step': 10296, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:51.923738', 'step': 10296, 'epoch': 2} {'type': 'loss', 'content': 0.1814832091331482, 'timestamp': '2025-10-01 04:30:51.926520', 'step': 10297, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:51.983112', 'step': 10297, 'epoch': 2} {'type': 'loss', 'content': 0.17956945300102234, 'timestamp': '2025-10-01 04:30:51.990844', 'step': 10298, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:52.044079', 'step': 10298, 'epoch': 2} {'type': 'loss', 'content': 0.10736137628555298, 'timestamp': '2025-10-01 04:30:52.046447', 'step': 10299, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:52.099165', 'step': 10299, 'epoch': 2} {'type': 'loss', 'content': 0.15818969905376434, 'timestamp': '2025-10-01 04:30:52.104914', 'step': 10300, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:52.157579', 'step': 10300, 'epoch': 2} {'type': 'loss', 'content': 0.22383086383342743, 'timestamp': '2025-10-01 04:30:52.159923', 'step': 10301, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:52.213131', 'step': 10301, 'epoch': 2} {'type': 'loss', 'content': 0.10324829816818237, 'timestamp': '2025-10-01 04:30:52.215327', 'step': 10302, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:30:52.274477', 'step': 10302, 'epoch': 2} {'type': 'loss', 'content': 0.12901045382022858, 'timestamp': '2025-10-01 04:30:52.276827', 'step': 10303, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:52.330788', 'step': 10303, 'epoch': 2} {'type': 'loss', 'content': 0.13243336975574493, 'timestamp': '2025-10-01 04:30:52.336957', 'step': 10304, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:52.390065', 'step': 10304, 'epoch': 2} {'type': 'loss', 'content': 0.08554667979478836, 'timestamp': '2025-10-01 04:30:52.393764', 'step': 10305, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:52.452401', 'step': 10305, 'epoch': 2} {'type': 'loss', 'content': 0.1617376208305359, 'timestamp': '2025-10-01 04:30:52.456012', 'step': 10306, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:52.509449', 'step': 10306, 'epoch': 2} {'type': 'loss', 'content': 0.09423423558473587, 'timestamp': '2025-10-01 04:30:52.511651', 'step': 10307, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:52.565190', 'step': 10307, 'epoch': 2} {'type': 'loss', 'content': 0.08798027783632278, 'timestamp': '2025-10-01 04:30:52.571069', 'step': 10308, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:52.623580', 'step': 10308, 'epoch': 2} {'type': 'loss', 'content': 0.1362600326538086, 'timestamp': '2025-10-01 04:30:52.625783', 'step': 10309, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:52.678536', 'step': 10309, 'epoch': 2} {'type': 'loss', 'content': 0.10315950214862823, 'timestamp': '2025-10-01 04:30:52.680716', 'step': 10310, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:52.734197', 'step': 10310, 'epoch': 2} {'type': 'loss', 'content': 0.11483845859766006, 'timestamp': '2025-10-01 04:30:52.736774', 'step': 10311, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:52.790623', 'step': 10311, 'epoch': 2} {'type': 'loss', 'content': 0.10731678456068039, 'timestamp': '2025-10-01 04:30:52.796551', 'step': 10312, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:52.849894', 'step': 10312, 'epoch': 2} {'type': 'loss', 'content': 0.10836140811443329, 'timestamp': '2025-10-01 04:30:52.852044', 'step': 10313, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:52.905348', 'step': 10313, 'epoch': 2} {'type': 'loss', 'content': 0.11031271517276764, 'timestamp': '2025-10-01 04:30:52.907697', 'step': 10314, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:52.961552', 'step': 10314, 'epoch': 2} {'type': 'loss', 'content': 0.17035089433193207, 'timestamp': '2025-10-01 04:30:52.963842', 'step': 10315, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:53.017268', 'step': 10315, 'epoch': 2} {'type': 'loss', 'content': 0.12622971832752228, 'timestamp': '2025-10-01 04:30:53.023299', 'step': 10316, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:53.076020', 'step': 10316, 'epoch': 2} {'type': 'loss', 'content': 0.19622352719306946, 'timestamp': '2025-10-01 04:30:53.078693', 'step': 10317, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:53.132336', 'step': 10317, 'epoch': 2} {'type': 'loss', 'content': 0.1208467110991478, 'timestamp': '2025-10-01 04:30:53.134715', 'step': 10318, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:53.188648', 'step': 10318, 'epoch': 2} {'type': 'loss', 'content': 0.24283796548843384, 'timestamp': '2025-10-01 04:30:53.192430', 'step': 10319, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:53.246481', 'step': 10319, 'epoch': 2} {'type': 'loss', 'content': 0.18604007363319397, 'timestamp': '2025-10-01 04:30:53.252589', 'step': 10320, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:53.309313', 'step': 10320, 'epoch': 2} {'type': 'loss', 'content': 0.11042491346597672, 'timestamp': '2025-10-01 04:30:53.311661', 'step': 10321, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:53.365375', 'step': 10321, 'epoch': 2} {'type': 'loss', 'content': 0.17197534441947937, 'timestamp': '2025-10-01 04:30:53.367846', 'step': 10322, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:53.421812', 'step': 10322, 'epoch': 2} {'type': 'loss', 'content': 0.04165070876479149, 'timestamp': '2025-10-01 04:30:53.424282', 'step': 10323, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:53.478287', 'step': 10323, 'epoch': 2} {'type': 'loss', 'content': 0.09558999538421631, 'timestamp': '2025-10-01 04:30:53.484406', 'step': 10324, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:53.538216', 'step': 10324, 'epoch': 2} {'type': 'loss', 'content': 0.12847110629081726, 'timestamp': '2025-10-01 04:30:53.540564', 'step': 10325, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:53.594154', 'step': 10325, 'epoch': 2} {'type': 'loss', 'content': 0.14280228316783905, 'timestamp': '2025-10-01 04:30:53.596361', 'step': 10326, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:53.649855', 'step': 10326, 'epoch': 2} {'type': 'loss', 'content': 0.1301257312297821, 'timestamp': '2025-10-01 04:30:53.651928', 'step': 10327, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:53.705016', 'step': 10327, 'epoch': 2} {'type': 'loss', 'content': 0.11517217755317688, 'timestamp': '2025-10-01 04:30:53.710960', 'step': 10328, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:30:53.764189', 'step': 10328, 'epoch': 2} {'type': 'loss', 'content': 0.17614997923374176, 'timestamp': '2025-10-01 04:30:53.766346', 'step': 10329, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:53.819833', 'step': 10329, 'epoch': 2} {'type': 'loss', 'content': 0.0713636577129364, 'timestamp': '2025-10-01 04:30:53.822072', 'step': 10330, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:53.876342', 'step': 10330, 'epoch': 2} {'type': 'loss', 'content': 0.11513937264680862, 'timestamp': '2025-10-01 04:30:53.879108', 'step': 10331, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:53.942001', 'step': 10331, 'epoch': 2} {'type': 'loss', 'content': 0.06461178511381149, 'timestamp': '2025-10-01 04:30:53.949958', 'step': 10332, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:54.003692', 'step': 10332, 'epoch': 2} {'type': 'loss', 'content': 0.1736411601305008, 'timestamp': '2025-10-01 04:30:54.006552', 'step': 10333, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:54.060778', 'step': 10333, 'epoch': 2} {'type': 'loss', 'content': 0.09469736367464066, 'timestamp': '2025-10-01 04:30:54.063201', 'step': 10334, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:30:54.117931', 'step': 10334, 'epoch': 2} {'type': 'loss', 'content': 0.11575592309236526, 'timestamp': '2025-10-01 04:30:54.121548', 'step': 10335, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:54.175391', 'step': 10335, 'epoch': 2} {'type': 'loss', 'content': 0.18278948962688446, 'timestamp': '2025-10-01 04:30:54.181810', 'step': 10336, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:54.235665', 'step': 10336, 'epoch': 2} {'type': 'loss', 'content': 0.17946399748325348, 'timestamp': '2025-10-01 04:30:54.238140', 'step': 10337, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:54.291072', 'step': 10337, 'epoch': 2} {'type': 'loss', 'content': 0.1395518034696579, 'timestamp': '2025-10-01 04:30:54.294380', 'step': 10338, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:30:54.352534', 'step': 10338, 'epoch': 2} {'type': 'loss', 'content': 0.115459144115448, 'timestamp': '2025-10-01 04:30:54.354848', 'step': 10339, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:54.408254', 'step': 10339, 'epoch': 2} {'type': 'loss', 'content': 0.1268853396177292, 'timestamp': '2025-10-01 04:30:54.414690', 'step': 10340, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:30:54.468859', 'step': 10340, 'epoch': 2} {'type': 'loss', 'content': 0.15638835728168488, 'timestamp': '2025-10-01 04:30:54.471572', 'step': 10341, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:54.528175', 'step': 10341, 'epoch': 2} {'type': 'loss', 'content': 0.15718352794647217, 'timestamp': '2025-10-01 04:30:54.530780', 'step': 10342, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:54.585494', 'step': 10342, 'epoch': 2} {'type': 'loss', 'content': 0.10018213093280792, 'timestamp': '2025-10-01 04:30:54.588223', 'step': 10343, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:30:54.649152', 'step': 10343, 'epoch': 2} {'type': 'loss', 'content': 0.19712921977043152, 'timestamp': '2025-10-01 04:30:54.655717', 'step': 10344, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:30:54.709968', 'step': 10344, 'epoch': 2} {'type': 'loss', 'content': 0.07470531016588211, 'timestamp': '2025-10-01 04:30:54.717771', 'step': 10345, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:54.772627', 'step': 10345, 'epoch': 2} {'type': 'loss', 'content': 0.045637793838977814, 'timestamp': '2025-10-01 04:30:54.775392', 'step': 10346, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:54.830808', 'step': 10346, 'epoch': 2} {'type': 'loss', 'content': 0.13368403911590576, 'timestamp': '2025-10-01 04:30:54.833628', 'step': 10347, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:54.889056', 'step': 10347, 'epoch': 2} {'type': 'loss', 'content': 0.10292080044746399, 'timestamp': '2025-10-01 04:30:54.895354', 'step': 10348, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:54.949971', 'step': 10348, 'epoch': 2} {'type': 'loss', 'content': 0.1599043756723404, 'timestamp': '2025-10-01 04:30:54.952442', 'step': 10349, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:55.006997', 'step': 10349, 'epoch': 2} {'type': 'loss', 'content': 0.11911667138338089, 'timestamp': '2025-10-01 04:30:55.009427', 'step': 10350, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:55.063872', 'step': 10350, 'epoch': 2} {'type': 'loss', 'content': 0.06411297619342804, 'timestamp': '2025-10-01 04:30:55.066138', 'step': 10351, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:30:55.120115', 'step': 10351, 'epoch': 2} {'type': 'loss', 'content': 0.07268046587705612, 'timestamp': '2025-10-01 04:30:55.126243', 'step': 10352, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:55.179159', 'step': 10352, 'epoch': 2} {'type': 'loss', 'content': 0.08643919974565506, 'timestamp': '2025-10-01 04:30:55.181300', 'step': 10353, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:55.234474', 'step': 10353, 'epoch': 2} {'type': 'loss', 'content': 0.11999605596065521, 'timestamp': '2025-10-01 04:30:55.237121', 'step': 10354, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:55.290390', 'step': 10354, 'epoch': 2} {'type': 'loss', 'content': 0.08831051737070084, 'timestamp': '2025-10-01 04:30:55.292531', 'step': 10355, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-01 04:30:55.345256', 'step': 10355, 'epoch': 2} {'type': 'loss', 'content': 0.16672182083129883, 'timestamp': '2025-10-01 04:30:55.351116', 'step': 10356, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:55.411810', 'step': 10356, 'epoch': 2} {'type': 'loss', 'content': 0.05328715592622757, 'timestamp': '2025-10-01 04:30:55.414005', 'step': 10357, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:55.467233', 'step': 10357, 'epoch': 2} {'type': 'loss', 'content': 0.12843741476535797, 'timestamp': '2025-10-01 04:30:55.469358', 'step': 10358, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:55.523682', 'step': 10358, 'epoch': 2} {'type': 'loss', 'content': 0.19879566133022308, 'timestamp': '2025-10-01 04:30:55.527439', 'step': 10359, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:55.580891', 'step': 10359, 'epoch': 2} {'type': 'loss', 'content': 0.1182832345366478, 'timestamp': '2025-10-01 04:30:55.586997', 'step': 10360, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:55.640022', 'step': 10360, 'epoch': 2} {'type': 'loss', 'content': 0.12610425055027008, 'timestamp': '2025-10-01 04:30:55.642354', 'step': 10361, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:55.698185', 'step': 10361, 'epoch': 2} {'type': 'loss', 'content': 0.1196725070476532, 'timestamp': '2025-10-01 04:30:55.700453', 'step': 10362, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:55.753946', 'step': 10362, 'epoch': 2} {'type': 'loss', 'content': 0.15880067646503448, 'timestamp': '2025-10-01 04:30:55.755992', 'step': 10363, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:55.809042', 'step': 10363, 'epoch': 2} {'type': 'loss', 'content': 0.04030760005116463, 'timestamp': '2025-10-01 04:30:55.814911', 'step': 10364, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:30:55.868662', 'step': 10364, 'epoch': 2} {'type': 'loss', 'content': 0.1500653773546219, 'timestamp': '2025-10-01 04:30:55.871086', 'step': 10365, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:55.924760', 'step': 10365, 'epoch': 2} {'type': 'loss', 'content': 0.20005182921886444, 'timestamp': '2025-10-01 04:30:55.927799', 'step': 10366, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:55.981365', 'step': 10366, 'epoch': 2} {'type': 'loss', 'content': 0.08549348264932632, 'timestamp': '2025-10-01 04:30:55.983577', 'step': 10367, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:56.037252', 'step': 10367, 'epoch': 2} {'type': 'loss', 'content': 0.1390378177165985, 'timestamp': '2025-10-01 04:30:56.043074', 'step': 10368, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:56.096085', 'step': 10368, 'epoch': 2} {'type': 'loss', 'content': 0.10702432692050934, 'timestamp': '2025-10-01 04:30:56.098506', 'step': 10369, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:56.155519', 'step': 10369, 'epoch': 2} {'type': 'loss', 'content': 0.09480691701173782, 'timestamp': '2025-10-01 04:30:56.157604', 'step': 10370, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:56.210581', 'step': 10370, 'epoch': 2} {'type': 'loss', 'content': 0.09426916390657425, 'timestamp': '2025-10-01 04:30:56.213132', 'step': 10371, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:56.266566', 'step': 10371, 'epoch': 2} {'type': 'loss', 'content': 0.15470586717128754, 'timestamp': '2025-10-01 04:30:56.272377', 'step': 10372, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:56.327174', 'step': 10372, 'epoch': 2} {'type': 'loss', 'content': 0.10236015170812607, 'timestamp': '2025-10-01 04:30:56.329476', 'step': 10373, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:56.383226', 'step': 10373, 'epoch': 2} {'type': 'loss', 'content': 0.11929340660572052, 'timestamp': '2025-10-01 04:30:56.385524', 'step': 10374, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:56.439575', 'step': 10374, 'epoch': 2} {'type': 'loss', 'content': 0.1406937837600708, 'timestamp': '2025-10-01 04:30:56.441953', 'step': 10375, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:56.495696', 'step': 10375, 'epoch': 2} {'type': 'loss', 'content': 0.13566088676452637, 'timestamp': '2025-10-01 04:30:56.501650', 'step': 10376, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:56.554889', 'step': 10376, 'epoch': 2} {'type': 'loss', 'content': 0.08915669471025467, 'timestamp': '2025-10-01 04:30:56.561263', 'step': 10377, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:56.615808', 'step': 10377, 'epoch': 2} {'type': 'loss', 'content': 0.13439233601093292, 'timestamp': '2025-10-01 04:30:56.619926', 'step': 10378, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:56.676310', 'step': 10378, 'epoch': 2} {'type': 'loss', 'content': 0.13893002271652222, 'timestamp': '2025-10-01 04:30:56.678668', 'step': 10379, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:56.732253', 'step': 10379, 'epoch': 2} {'type': 'loss', 'content': 0.20305533707141876, 'timestamp': '2025-10-01 04:30:56.738353', 'step': 10380, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:56.791200', 'step': 10380, 'epoch': 2} {'type': 'loss', 'content': 0.1678551584482193, 'timestamp': '2025-10-01 04:30:56.793444', 'step': 10381, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:56.846943', 'step': 10381, 'epoch': 2} {'type': 'loss', 'content': 0.10021147131919861, 'timestamp': '2025-10-01 04:30:56.849165', 'step': 10382, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:56.902414', 'step': 10382, 'epoch': 2} {'type': 'loss', 'content': 0.12679477035999298, 'timestamp': '2025-10-01 04:30:56.904667', 'step': 10383, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:30:56.958562', 'step': 10383, 'epoch': 2} {'type': 'loss', 'content': 0.12670156359672546, 'timestamp': '2025-10-01 04:30:56.964824', 'step': 10384, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:30:57.018013', 'step': 10384, 'epoch': 2} {'type': 'loss', 'content': 0.16998060047626495, 'timestamp': '2025-10-01 04:30:57.020077', 'step': 10385, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:30:57.073659', 'step': 10385, 'epoch': 2} {'type': 'loss', 'content': 0.12672294676303864, 'timestamp': '2025-10-01 04:30:57.075774', 'step': 10386, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:57.129268', 'step': 10386, 'epoch': 2} {'type': 'loss', 'content': 0.053432878106832504, 'timestamp': '2025-10-01 04:30:57.131482', 'step': 10387, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:57.185300', 'step': 10387, 'epoch': 2} {'type': 'loss', 'content': 0.11064690351486206, 'timestamp': '2025-10-01 04:30:57.191125', 'step': 10388, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:57.245116', 'step': 10388, 'epoch': 2} {'type': 'loss', 'content': 0.17135266959667206, 'timestamp': '2025-10-01 04:30:57.247467', 'step': 10389, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:57.300896', 'step': 10389, 'epoch': 2} {'type': 'loss', 'content': 0.10619549453258514, 'timestamp': '2025-10-01 04:30:57.303073', 'step': 10390, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:57.357218', 'step': 10390, 'epoch': 2} {'type': 'loss', 'content': 0.12686684727668762, 'timestamp': '2025-10-01 04:30:57.360661', 'step': 10391, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:57.413654', 'step': 10391, 'epoch': 2} {'type': 'loss', 'content': 0.1403995305299759, 'timestamp': '2025-10-01 04:30:57.419329', 'step': 10392, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:57.472150', 'step': 10392, 'epoch': 2} {'type': 'loss', 'content': 0.12742586433887482, 'timestamp': '2025-10-01 04:30:57.474343', 'step': 10393, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:57.527666', 'step': 10393, 'epoch': 2} {'type': 'loss', 'content': 0.20317135751247406, 'timestamp': '2025-10-01 04:30:57.529894', 'step': 10394, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:57.583241', 'step': 10394, 'epoch': 2} {'type': 'loss', 'content': 0.09557978063821793, 'timestamp': '2025-10-01 04:30:57.585516', 'step': 10395, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-01 04:30:57.639106', 'step': 10395, 'epoch': 2} {'type': 'loss', 'content': 0.19049842655658722, 'timestamp': '2025-10-01 04:30:57.644775', 'step': 10396, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:57.697445', 'step': 10396, 'epoch': 2} {'type': 'loss', 'content': 0.17407920956611633, 'timestamp': '2025-10-01 04:30:57.699673', 'step': 10397, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:30:57.752836', 'step': 10397, 'epoch': 2} {'type': 'loss', 'content': 0.21715635061264038, 'timestamp': '2025-10-01 04:30:57.755378', 'step': 10398, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:57.808825', 'step': 10398, 'epoch': 2} {'type': 'loss', 'content': 0.15472328662872314, 'timestamp': '2025-10-01 04:30:57.810961', 'step': 10399, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:57.865123', 'step': 10399, 'epoch': 2} {'type': 'loss', 'content': 0.1737358272075653, 'timestamp': '2025-10-01 04:30:57.870833', 'step': 10400, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:57.924007', 'step': 10400, 'epoch': 2} {'type': 'loss', 'content': 0.14487795531749725, 'timestamp': '2025-10-01 04:30:57.926196', 'step': 10401, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:57.980416', 'step': 10401, 'epoch': 2} {'type': 'loss', 'content': 0.09105466306209564, 'timestamp': '2025-10-01 04:30:57.984271', 'step': 10402, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:58.038999', 'step': 10402, 'epoch': 2} {'type': 'loss', 'content': 0.11156786233186722, 'timestamp': '2025-10-01 04:30:58.041337', 'step': 10403, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:58.095012', 'step': 10403, 'epoch': 2} {'type': 'loss', 'content': 0.07665479928255081, 'timestamp': '2025-10-01 04:30:58.101062', 'step': 10404, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:58.160649', 'step': 10404, 'epoch': 2} {'type': 'loss', 'content': 0.13336321711540222, 'timestamp': '2025-10-01 04:30:58.162898', 'step': 10405, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:58.216000', 'step': 10405, 'epoch': 2} {'type': 'loss', 'content': 0.11919236928224564, 'timestamp': '2025-10-01 04:30:58.218237', 'step': 10406, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:30:58.272345', 'step': 10406, 'epoch': 2} {'type': 'loss', 'content': 0.09750258922576904, 'timestamp': '2025-10-01 04:30:58.285537', 'step': 10407, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:58.346342', 'step': 10407, 'epoch': 2} {'type': 'loss', 'content': 0.11946134269237518, 'timestamp': '2025-10-01 04:30:58.351921', 'step': 10408, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:58.404447', 'step': 10408, 'epoch': 2} {'type': 'loss', 'content': 0.08701145648956299, 'timestamp': '2025-10-01 04:30:58.406628', 'step': 10409, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:58.459878', 'step': 10409, 'epoch': 2} {'type': 'loss', 'content': 0.0882364958524704, 'timestamp': '2025-10-01 04:30:58.461897', 'step': 10410, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:58.514808', 'step': 10410, 'epoch': 2} {'type': 'loss', 'content': 0.15246984362602234, 'timestamp': '2025-10-01 04:30:58.517005', 'step': 10411, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:30:58.570335', 'step': 10411, 'epoch': 2} {'type': 'loss', 'content': 0.2292281836271286, 'timestamp': '2025-10-01 04:30:58.576207', 'step': 10412, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:58.629094', 'step': 10412, 'epoch': 2} {'type': 'loss', 'content': 0.14707553386688232, 'timestamp': '2025-10-01 04:30:58.631454', 'step': 10413, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:58.684548', 'step': 10413, 'epoch': 2} {'type': 'loss', 'content': 0.10417380928993225, 'timestamp': '2025-10-01 04:30:58.686671', 'step': 10414, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:58.739323', 'step': 10414, 'epoch': 2} {'type': 'loss', 'content': 0.12117107212543488, 'timestamp': '2025-10-01 04:30:58.741779', 'step': 10415, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:58.795100', 'step': 10415, 'epoch': 2} {'type': 'loss', 'content': 0.0921621024608612, 'timestamp': '2025-10-01 04:30:58.801030', 'step': 10416, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:58.853671', 'step': 10416, 'epoch': 2} {'type': 'loss', 'content': 0.13306854665279388, 'timestamp': '2025-10-01 04:30:58.855938', 'step': 10417, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:58.908577', 'step': 10417, 'epoch': 2} {'type': 'loss', 'content': 0.2129155546426773, 'timestamp': '2025-10-01 04:30:58.910912', 'step': 10418, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:58.965530', 'step': 10418, 'epoch': 2} {'type': 'loss', 'content': 0.1286550611257553, 'timestamp': '2025-10-01 04:30:58.967898', 'step': 10419, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:59.021276', 'step': 10419, 'epoch': 2} {'type': 'loss', 'content': 0.16245734691619873, 'timestamp': '2025-10-01 04:30:59.027024', 'step': 10420, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:59.079484', 'step': 10420, 'epoch': 2} {'type': 'loss', 'content': 0.16890905797481537, 'timestamp': '2025-10-01 04:30:59.082323', 'step': 10421, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:59.135537', 'step': 10421, 'epoch': 2} {'type': 'loss', 'content': 0.1499205231666565, 'timestamp': '2025-10-01 04:30:59.137776', 'step': 10422, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:59.190901', 'step': 10422, 'epoch': 2} {'type': 'loss', 'content': 0.16132009029388428, 'timestamp': '2025-10-01 04:30:59.193422', 'step': 10423, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:59.246648', 'step': 10423, 'epoch': 2} {'type': 'loss', 'content': 0.14083288609981537, 'timestamp': '2025-10-01 04:30:59.252582', 'step': 10424, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:59.305227', 'step': 10424, 'epoch': 2} {'type': 'loss', 'content': 0.1534610390663147, 'timestamp': '2025-10-01 04:30:59.307404', 'step': 10425, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:59.360432', 'step': 10425, 'epoch': 2} {'type': 'loss', 'content': 0.1578390747308731, 'timestamp': '2025-10-01 04:30:59.362532', 'step': 10426, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:59.415834', 'step': 10426, 'epoch': 2} {'type': 'loss', 'content': 0.1272207647562027, 'timestamp': '2025-10-01 04:30:59.417903', 'step': 10427, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:59.471103', 'step': 10427, 'epoch': 2} {'type': 'loss', 'content': 0.11175592243671417, 'timestamp': '2025-10-01 04:30:59.476739', 'step': 10428, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:59.529879', 'step': 10428, 'epoch': 2} {'type': 'loss', 'content': 0.11638757586479187, 'timestamp': '2025-10-01 04:30:59.532235', 'step': 10429, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:59.585641', 'step': 10429, 'epoch': 2} {'type': 'loss', 'content': 0.10300111770629883, 'timestamp': '2025-10-01 04:30:59.589602', 'step': 10430, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:30:59.643910', 'step': 10430, 'epoch': 2} {'type': 'loss', 'content': 0.17948617041110992, 'timestamp': '2025-10-01 04:30:59.646001', 'step': 10431, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:59.699321', 'step': 10431, 'epoch': 2} {'type': 'loss', 'content': 0.14262914657592773, 'timestamp': '2025-10-01 04:30:59.706768', 'step': 10432, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:30:59.759801', 'step': 10432, 'epoch': 2} {'type': 'loss', 'content': 0.1319970041513443, 'timestamp': '2025-10-01 04:30:59.762051', 'step': 10433, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:30:59.815176', 'step': 10433, 'epoch': 2} {'type': 'loss', 'content': 0.11317060887813568, 'timestamp': '2025-10-01 04:30:59.817460', 'step': 10434, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:30:59.870980', 'step': 10434, 'epoch': 2} {'type': 'loss', 'content': 0.11870480328798294, 'timestamp': '2025-10-01 04:30:59.873087', 'step': 10435, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:30:59.926301', 'step': 10435, 'epoch': 2} {'type': 'loss', 'content': 0.066129669547081, 'timestamp': '2025-10-01 04:30:59.932182', 'step': 10436, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-01 04:30:59.985176', 'step': 10436, 'epoch': 2} {'type': 'loss', 'content': 0.14910657703876495, 'timestamp': '2025-10-01 04:30:59.987521', 'step': 10437, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:00.040902', 'step': 10437, 'epoch': 2} {'type': 'loss', 'content': 0.0743657648563385, 'timestamp': '2025-10-01 04:31:00.043075', 'step': 10438, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:00.096305', 'step': 10438, 'epoch': 2} {'type': 'loss', 'content': 0.047360654920339584, 'timestamp': '2025-10-01 04:31:00.098539', 'step': 10439, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:00.151643', 'step': 10439, 'epoch': 2} {'type': 'loss', 'content': 0.18063272535800934, 'timestamp': '2025-10-01 04:31:00.157445', 'step': 10440, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:31:00.210692', 'step': 10440, 'epoch': 2} {'type': 'loss', 'content': 0.09027832746505737, 'timestamp': '2025-10-01 04:31:00.212786', 'step': 10441, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:00.269008', 'step': 10441, 'epoch': 2} {'type': 'loss', 'content': 0.1112900823354721, 'timestamp': '2025-10-01 04:31:00.271126', 'step': 10442, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:00.324744', 'step': 10442, 'epoch': 2} {'type': 'loss', 'content': 0.08075117319822311, 'timestamp': '2025-10-01 04:31:00.326880', 'step': 10443, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:00.380249', 'step': 10443, 'epoch': 2} {'type': 'loss', 'content': 0.17557671666145325, 'timestamp': '2025-10-01 04:31:00.387254', 'step': 10444, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:00.440227', 'step': 10444, 'epoch': 2} {'type': 'loss', 'content': 0.08671346306800842, 'timestamp': '2025-10-01 04:31:00.442784', 'step': 10445, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:31:00.496428', 'step': 10445, 'epoch': 2} {'type': 'loss', 'content': 0.10073594003915787, 'timestamp': '2025-10-01 04:31:00.499123', 'step': 10446, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:31:00.554178', 'step': 10446, 'epoch': 2} {'type': 'loss', 'content': 0.06125335395336151, 'timestamp': '2025-10-01 04:31:00.556374', 'step': 10447, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:31:00.617005', 'step': 10447, 'epoch': 2} {'type': 'loss', 'content': 0.19398503005504608, 'timestamp': '2025-10-01 04:31:00.623027', 'step': 10448, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:00.676028', 'step': 10448, 'epoch': 2} {'type': 'loss', 'content': 0.17543141543865204, 'timestamp': '2025-10-01 04:31:00.678373', 'step': 10449, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:31:00.732374', 'step': 10449, 'epoch': 2} {'type': 'loss', 'content': 0.1300741285085678, 'timestamp': '2025-10-01 04:31:00.734562', 'step': 10450, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:00.787790', 'step': 10450, 'epoch': 2} {'type': 'loss', 'content': 0.11145173013210297, 'timestamp': '2025-10-01 04:31:00.790490', 'step': 10451, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:00.844070', 'step': 10451, 'epoch': 2} {'type': 'loss', 'content': 0.20339900255203247, 'timestamp': '2025-10-01 04:31:00.850183', 'step': 10452, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:00.905126', 'step': 10452, 'epoch': 2} {'type': 'loss', 'content': 0.17983542382717133, 'timestamp': '2025-10-01 04:31:00.909709', 'step': 10453, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:00.968011', 'step': 10453, 'epoch': 2} {'type': 'loss', 'content': 0.15901343524456024, 'timestamp': '2025-10-01 04:31:00.970838', 'step': 10454, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:01.039016', 'step': 10454, 'epoch': 2} {'type': 'loss', 'content': 0.14894647896289825, 'timestamp': '2025-10-01 04:31:01.042027', 'step': 10455, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:01.101789', 'step': 10455, 'epoch': 2} {'type': 'loss', 'content': 0.10520760715007782, 'timestamp': '2025-10-01 04:31:01.111587', 'step': 10456, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:01.178262', 'step': 10456, 'epoch': 2} {'type': 'loss', 'content': 0.10050360858440399, 'timestamp': '2025-10-01 04:31:01.181250', 'step': 10457, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:01.237434', 'step': 10457, 'epoch': 2} {'type': 'loss', 'content': 0.130447655916214, 'timestamp': '2025-10-01 04:31:01.240914', 'step': 10458, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:31:01.301010', 'step': 10458, 'epoch': 2} {'type': 'loss', 'content': 0.12485837936401367, 'timestamp': '2025-10-01 04:31:01.312395', 'step': 10459, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:01.384783', 'step': 10459, 'epoch': 2} {'type': 'loss', 'content': 0.11205440014600754, 'timestamp': '2025-10-01 04:31:01.393796', 'step': 10460, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:01.456950', 'step': 10460, 'epoch': 2} {'type': 'loss', 'content': 0.06985782086849213, 'timestamp': '2025-10-01 04:31:01.462976', 'step': 10461, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:01.524920', 'step': 10461, 'epoch': 2} {'type': 'loss', 'content': 0.15131404995918274, 'timestamp': '2025-10-01 04:31:01.529058', 'step': 10462, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:01.591468', 'step': 10462, 'epoch': 2} {'type': 'loss', 'content': 0.1121760755777359, 'timestamp': '2025-10-01 04:31:01.598633', 'step': 10463, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:01.672673', 'step': 10463, 'epoch': 2} {'type': 'loss', 'content': 0.09679342061281204, 'timestamp': '2025-10-01 04:31:01.687949', 'step': 10464, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:01.757931', 'step': 10464, 'epoch': 2} {'type': 'loss', 'content': 0.04631052166223526, 'timestamp': '2025-10-01 04:31:01.768162', 'step': 10465, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:01.829068', 'step': 10465, 'epoch': 2} {'type': 'loss', 'content': 0.13810579478740692, 'timestamp': '2025-10-01 04:31:01.840641', 'step': 10466, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:31:01.913604', 'step': 10466, 'epoch': 2} {'type': 'loss', 'content': 0.15277567505836487, 'timestamp': '2025-10-01 04:31:01.922684', 'step': 10467, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:01.981640', 'step': 10467, 'epoch': 2} {'type': 'loss', 'content': 0.05158303678035736, 'timestamp': '2025-10-01 04:31:01.987470', 'step': 10468, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:02.040981', 'step': 10468, 'epoch': 2} {'type': 'loss', 'content': 0.17456179857254028, 'timestamp': '2025-10-01 04:31:02.043647', 'step': 10469, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:31:02.097870', 'step': 10469, 'epoch': 2} {'type': 'loss', 'content': 0.19310705363750458, 'timestamp': '2025-10-01 04:31:02.100122', 'step': 10470, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:02.154281', 'step': 10470, 'epoch': 2} {'type': 'loss', 'content': 0.10494856536388397, 'timestamp': '2025-10-01 04:31:02.160208', 'step': 10471, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:02.216125', 'step': 10471, 'epoch': 2} {'type': 'loss', 'content': 0.03749290108680725, 'timestamp': '2025-10-01 04:31:02.221734', 'step': 10472, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:02.274241', 'step': 10472, 'epoch': 2} {'type': 'loss', 'content': 0.09861081093549728, 'timestamp': '2025-10-01 04:31:02.276454', 'step': 10473, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:02.329587', 'step': 10473, 'epoch': 2} {'type': 'loss', 'content': 0.0797213539481163, 'timestamp': '2025-10-01 04:31:02.331691', 'step': 10474, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:02.385147', 'step': 10474, 'epoch': 2} {'type': 'loss', 'content': 0.0644230842590332, 'timestamp': '2025-10-01 04:31:02.387444', 'step': 10475, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:02.440262', 'step': 10475, 'epoch': 2} {'type': 'loss', 'content': 0.13488943874835968, 'timestamp': '2025-10-01 04:31:02.446133', 'step': 10476, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:02.499294', 'step': 10476, 'epoch': 2} {'type': 'loss', 'content': 0.16558510065078735, 'timestamp': '2025-10-01 04:31:02.501567', 'step': 10477, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:31:02.554794', 'step': 10477, 'epoch': 2} {'type': 'loss', 'content': 0.1794845014810562, 'timestamp': '2025-10-01 04:31:02.557959', 'step': 10478, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:02.611115', 'step': 10478, 'epoch': 2} {'type': 'loss', 'content': 0.06704086065292358, 'timestamp': '2025-10-01 04:31:02.613484', 'step': 10479, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:31:02.666795', 'step': 10479, 'epoch': 2} {'type': 'loss', 'content': 0.12439165264368057, 'timestamp': '2025-10-01 04:31:02.672409', 'step': 10480, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:02.724770', 'step': 10480, 'epoch': 2} {'type': 'loss', 'content': 0.10482221096754074, 'timestamp': '2025-10-01 04:31:02.726777', 'step': 10481, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:31:02.779630', 'step': 10481, 'epoch': 2} {'type': 'loss', 'content': 0.1480579376220703, 'timestamp': '2025-10-01 04:31:02.781824', 'step': 10482, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:02.839697', 'step': 10482, 'epoch': 2} {'type': 'loss', 'content': 0.18154576420783997, 'timestamp': '2025-10-01 04:31:02.841885', 'step': 10483, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:02.894819', 'step': 10483, 'epoch': 2} {'type': 'loss', 'content': 0.10830181837081909, 'timestamp': '2025-10-01 04:31:02.900686', 'step': 10484, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:31:02.953184', 'step': 10484, 'epoch': 2} {'type': 'loss', 'content': 0.12440966069698334, 'timestamp': '2025-10-01 04:31:02.955915', 'step': 10485, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:03.009678', 'step': 10485, 'epoch': 2} {'type': 'loss', 'content': 0.04926885664463043, 'timestamp': '2025-10-01 04:31:03.011871', 'step': 10486, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:03.064895', 'step': 10486, 'epoch': 2} {'type': 'loss', 'content': 0.13330312073230743, 'timestamp': '2025-10-01 04:31:03.066834', 'step': 10487, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:03.119118', 'step': 10487, 'epoch': 2} {'type': 'loss', 'content': 0.17281220853328705, 'timestamp': '2025-10-01 04:31:03.124812', 'step': 10488, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:03.177685', 'step': 10488, 'epoch': 2} {'type': 'loss', 'content': 0.09758499264717102, 'timestamp': '2025-10-01 04:31:03.179882', 'step': 10489, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:03.232953', 'step': 10489, 'epoch': 2} {'type': 'loss', 'content': 0.1371491700410843, 'timestamp': '2025-10-01 04:31:03.235380', 'step': 10490, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:03.289407', 'step': 10490, 'epoch': 2} {'type': 'loss', 'content': 0.13517434895038605, 'timestamp': '2025-10-01 04:31:03.291891', 'step': 10491, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:03.344808', 'step': 10491, 'epoch': 2} {'type': 'loss', 'content': 0.13488154113292694, 'timestamp': '2025-10-01 04:31:03.350706', 'step': 10492, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:03.403492', 'step': 10492, 'epoch': 2} {'type': 'loss', 'content': 0.12455493211746216, 'timestamp': '2025-10-01 04:31:03.407133', 'step': 10493, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:03.460824', 'step': 10493, 'epoch': 2} {'type': 'loss', 'content': 0.15372048318386078, 'timestamp': '2025-10-01 04:31:03.462866', 'step': 10494, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:31:03.516945', 'step': 10494, 'epoch': 2} {'type': 'loss', 'content': 0.11295171827077866, 'timestamp': '2025-10-01 04:31:03.519471', 'step': 10495, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:03.572642', 'step': 10495, 'epoch': 2} {'type': 'loss', 'content': 0.13860183954238892, 'timestamp': '2025-10-01 04:31:03.578948', 'step': 10496, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:03.631910', 'step': 10496, 'epoch': 2} {'type': 'loss', 'content': 0.13221882283687592, 'timestamp': '2025-10-01 04:31:03.634196', 'step': 10497, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:03.687148', 'step': 10497, 'epoch': 2} {'type': 'loss', 'content': 0.17295990884304047, 'timestamp': '2025-10-01 04:31:03.689360', 'step': 10498, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:03.743127', 'step': 10498, 'epoch': 2} {'type': 'loss', 'content': 0.08965001255273819, 'timestamp': '2025-10-01 04:31:03.745245', 'step': 10499, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:31:03.798440', 'step': 10499, 'epoch': 2} {'type': 'loss', 'content': 0.1564803272485733, 'timestamp': '2025-10-01 04:31:03.804133', 'step': 10500, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 10500', 'timestamp': '2025-10-01 04:31:04.366597', 'step': 10500, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:04.422078', 'step': 10500, 'epoch': 2} {'type': 'loss', 'content': 0.10336336493492126, 'timestamp': '2025-10-01 04:31:04.424262', 'step': 10501, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:04.477967', 'step': 10501, 'epoch': 2} {'type': 'loss', 'content': 0.09692395478487015, 'timestamp': '2025-10-01 04:31:04.480208', 'step': 10502, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:04.532914', 'step': 10502, 'epoch': 2} {'type': 'loss', 'content': 0.15617498755455017, 'timestamp': '2025-10-01 04:31:04.535040', 'step': 10503, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:04.587715', 'step': 10503, 'epoch': 2} {'type': 'loss', 'content': 0.1571044772863388, 'timestamp': '2025-10-01 04:31:04.593929', 'step': 10504, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:04.646222', 'step': 10504, 'epoch': 2} {'type': 'loss', 'content': 0.2267216593027115, 'timestamp': '2025-10-01 04:31:04.648431', 'step': 10505, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:04.701530', 'step': 10505, 'epoch': 2} {'type': 'loss', 'content': 0.07228084653615952, 'timestamp': '2025-10-01 04:31:04.703685', 'step': 10506, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:04.759272', 'step': 10506, 'epoch': 2} {'type': 'loss', 'content': 0.19191351532936096, 'timestamp': '2025-10-01 04:31:04.761474', 'step': 10507, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:31:04.813718', 'step': 10507, 'epoch': 2} {'type': 'loss', 'content': 0.09633465856313705, 'timestamp': '2025-10-01 04:31:04.819697', 'step': 10508, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:04.871855', 'step': 10508, 'epoch': 2} {'type': 'loss', 'content': 0.0978856086730957, 'timestamp': '2025-10-01 04:31:04.874319', 'step': 10509, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:04.931793', 'step': 10509, 'epoch': 2} {'type': 'loss', 'content': 0.06882523745298386, 'timestamp': '2025-10-01 04:31:04.934027', 'step': 10510, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:04.987915', 'step': 10510, 'epoch': 2} {'type': 'loss', 'content': 0.1735268235206604, 'timestamp': '2025-10-01 04:31:04.990522', 'step': 10511, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:05.043239', 'step': 10511, 'epoch': 2} {'type': 'loss', 'content': 0.11194659024477005, 'timestamp': '2025-10-01 04:31:05.049499', 'step': 10512, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:05.106156', 'step': 10512, 'epoch': 2} {'type': 'loss', 'content': 0.1349962055683136, 'timestamp': '2025-10-01 04:31:05.108597', 'step': 10513, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:05.161644', 'step': 10513, 'epoch': 2} {'type': 'loss', 'content': 0.138026162981987, 'timestamp': '2025-10-01 04:31:05.163860', 'step': 10514, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:05.217823', 'step': 10514, 'epoch': 2} {'type': 'loss', 'content': 0.07845433056354523, 'timestamp': '2025-10-01 04:31:05.219889', 'step': 10515, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:05.273107', 'step': 10515, 'epoch': 2} {'type': 'loss', 'content': 0.1766791045665741, 'timestamp': '2025-10-01 04:31:05.278637', 'step': 10516, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:05.330844', 'step': 10516, 'epoch': 2} {'type': 'loss', 'content': 0.07223214954137802, 'timestamp': '2025-10-01 04:31:05.332984', 'step': 10517, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:05.385845', 'step': 10517, 'epoch': 2} {'type': 'loss', 'content': 0.19474610686302185, 'timestamp': '2025-10-01 04:31:05.388193', 'step': 10518, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:05.441119', 'step': 10518, 'epoch': 2} {'type': 'loss', 'content': 0.1131877452135086, 'timestamp': '2025-10-01 04:31:05.443451', 'step': 10519, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:05.495892', 'step': 10519, 'epoch': 2} {'type': 'loss', 'content': 0.19799810647964478, 'timestamp': '2025-10-01 04:31:05.501620', 'step': 10520, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:31:05.554377', 'step': 10520, 'epoch': 2} {'type': 'loss', 'content': 0.02791348285973072, 'timestamp': '2025-10-01 04:31:05.556644', 'step': 10521, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:05.609163', 'step': 10521, 'epoch': 2} {'type': 'loss', 'content': 0.08158987760543823, 'timestamp': '2025-10-01 04:31:05.611263', 'step': 10522, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:05.664205', 'step': 10522, 'epoch': 2} {'type': 'loss', 'content': 0.23058022558689117, 'timestamp': '2025-10-01 04:31:05.666534', 'step': 10523, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:05.719980', 'step': 10523, 'epoch': 2} {'type': 'loss', 'content': 0.11933759599924088, 'timestamp': '2025-10-01 04:31:05.726108', 'step': 10524, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:05.778683', 'step': 10524, 'epoch': 2} {'type': 'loss', 'content': 0.10745584219694138, 'timestamp': '2025-10-01 04:31:05.780784', 'step': 10525, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:05.834412', 'step': 10525, 'epoch': 2} {'type': 'loss', 'content': 0.06254126131534576, 'timestamp': '2025-10-01 04:31:05.836608', 'step': 10526, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:05.888839', 'step': 10526, 'epoch': 2} {'type': 'loss', 'content': 0.11683198064565659, 'timestamp': '2025-10-01 04:31:05.891444', 'step': 10527, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:05.964688', 'step': 10527, 'epoch': 2} {'type': 'loss', 'content': 0.10397737473249435, 'timestamp': '2025-10-01 04:31:05.970619', 'step': 10528, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:06.023211', 'step': 10528, 'epoch': 2} {'type': 'loss', 'content': 0.22386255860328674, 'timestamp': '2025-10-01 04:31:06.035226', 'step': 10529, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:06.088473', 'step': 10529, 'epoch': 2} {'type': 'loss', 'content': 0.17119084298610687, 'timestamp': '2025-10-01 04:31:06.090689', 'step': 10530, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:06.143436', 'step': 10530, 'epoch': 2} {'type': 'loss', 'content': 0.16523776948451996, 'timestamp': '2025-10-01 04:31:06.146008', 'step': 10531, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:06.199555', 'step': 10531, 'epoch': 2} {'type': 'loss', 'content': 0.12752294540405273, 'timestamp': '2025-10-01 04:31:06.205386', 'step': 10532, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:06.257757', 'step': 10532, 'epoch': 2} {'type': 'loss', 'content': 0.17073455452919006, 'timestamp': '2025-10-01 04:31:06.260077', 'step': 10533, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:06.313192', 'step': 10533, 'epoch': 2} {'type': 'loss', 'content': 0.09220278263092041, 'timestamp': '2025-10-01 04:31:06.315496', 'step': 10534, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:31:06.369446', 'step': 10534, 'epoch': 2} {'type': 'loss', 'content': 0.08944608271121979, 'timestamp': '2025-10-01 04:31:06.371382', 'step': 10535, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:06.432827', 'step': 10535, 'epoch': 2} {'type': 'loss', 'content': 0.05297262594103813, 'timestamp': '2025-10-01 04:31:06.438550', 'step': 10536, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:06.495444', 'step': 10536, 'epoch': 2} {'type': 'loss', 'content': 0.1456514596939087, 'timestamp': '2025-10-01 04:31:06.497591', 'step': 10537, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:06.551698', 'step': 10537, 'epoch': 2} {'type': 'loss', 'content': 0.1066221073269844, 'timestamp': '2025-10-01 04:31:06.562459', 'step': 10538, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:06.622669', 'step': 10538, 'epoch': 2} {'type': 'loss', 'content': 0.09466179460287094, 'timestamp': '2025-10-01 04:31:06.624888', 'step': 10539, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:06.677982', 'step': 10539, 'epoch': 2} {'type': 'loss', 'content': 0.21231836080551147, 'timestamp': '2025-10-01 04:31:06.684775', 'step': 10540, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:06.739816', 'step': 10540, 'epoch': 2} {'type': 'loss', 'content': 0.11769099533557892, 'timestamp': '2025-10-01 04:31:06.741812', 'step': 10541, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:06.796210', 'step': 10541, 'epoch': 2} {'type': 'loss', 'content': 0.17756956815719604, 'timestamp': '2025-10-01 04:31:06.798519', 'step': 10542, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:06.860065', 'step': 10542, 'epoch': 2} {'type': 'loss', 'content': 0.1356760412454605, 'timestamp': '2025-10-01 04:31:06.863519', 'step': 10543, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:06.924602', 'step': 10543, 'epoch': 2} {'type': 'loss', 'content': 0.10418058186769485, 'timestamp': '2025-10-01 04:31:06.932441', 'step': 10544, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:31:06.993333', 'step': 10544, 'epoch': 2} {'type': 'loss', 'content': 0.04847396910190582, 'timestamp': '2025-10-01 04:31:06.995661', 'step': 10545, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:31:07.048322', 'step': 10545, 'epoch': 2} {'type': 'loss', 'content': 0.12694334983825684, 'timestamp': '2025-10-01 04:31:07.050775', 'step': 10546, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:07.104274', 'step': 10546, 'epoch': 2} {'type': 'loss', 'content': 0.10485256463289261, 'timestamp': '2025-10-01 04:31:07.108712', 'step': 10547, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:31:07.177297', 'step': 10547, 'epoch': 2} {'type': 'loss', 'content': 0.16759967803955078, 'timestamp': '2025-10-01 04:31:07.184256', 'step': 10548, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:07.236530', 'step': 10548, 'epoch': 2} {'type': 'loss', 'content': 0.21016688644886017, 'timestamp': '2025-10-01 04:31:07.244325', 'step': 10549, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:07.297027', 'step': 10549, 'epoch': 2} {'type': 'loss', 'content': 0.07365287840366364, 'timestamp': '2025-10-01 04:31:07.302957', 'step': 10550, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:07.357215', 'step': 10550, 'epoch': 2} {'type': 'loss', 'content': 0.17360082268714905, 'timestamp': '2025-10-01 04:31:07.359505', 'step': 10551, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:07.412433', 'step': 10551, 'epoch': 2} {'type': 'loss', 'content': 0.13948602974414825, 'timestamp': '2025-10-01 04:31:07.418144', 'step': 10552, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:07.470901', 'step': 10552, 'epoch': 2} {'type': 'loss', 'content': 0.1662186086177826, 'timestamp': '2025-10-01 04:31:07.472926', 'step': 10553, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:07.526538', 'step': 10553, 'epoch': 2} {'type': 'loss', 'content': 0.0838940441608429, 'timestamp': '2025-10-01 04:31:07.528973', 'step': 10554, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:07.582566', 'step': 10554, 'epoch': 2} {'type': 'loss', 'content': 0.12160437554121017, 'timestamp': '2025-10-01 04:31:07.584687', 'step': 10555, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:07.637584', 'step': 10555, 'epoch': 2} {'type': 'loss', 'content': 0.1089228093624115, 'timestamp': '2025-10-01 04:31:07.643167', 'step': 10556, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:07.696306', 'step': 10556, 'epoch': 2} {'type': 'loss', 'content': 0.08374608308076859, 'timestamp': '2025-10-01 04:31:07.706887', 'step': 10557, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:07.760470', 'step': 10557, 'epoch': 2} {'type': 'loss', 'content': 0.13425517082214355, 'timestamp': '2025-10-01 04:31:07.762692', 'step': 10558, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:07.815279', 'step': 10558, 'epoch': 2} {'type': 'loss', 'content': 0.0950637012720108, 'timestamp': '2025-10-01 04:31:07.817263', 'step': 10559, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:07.870120', 'step': 10559, 'epoch': 2} {'type': 'loss', 'content': 0.14998342096805573, 'timestamp': '2025-10-01 04:31:07.875629', 'step': 10560, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:31:07.928984', 'step': 10560, 'epoch': 2} {'type': 'loss', 'content': 0.17379380762577057, 'timestamp': '2025-10-01 04:31:07.941236', 'step': 10561, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:07.995752', 'step': 10561, 'epoch': 2} {'type': 'loss', 'content': 0.08404118567705154, 'timestamp': '2025-10-01 04:31:07.998044', 'step': 10562, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:08.050694', 'step': 10562, 'epoch': 2} {'type': 'loss', 'content': 0.2103257030248642, 'timestamp': '2025-10-01 04:31:08.062304', 'step': 10563, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:08.117252', 'step': 10563, 'epoch': 2} {'type': 'loss', 'content': 0.07498184591531754, 'timestamp': '2025-10-01 04:31:08.122729', 'step': 10564, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:08.175861', 'step': 10564, 'epoch': 2} {'type': 'loss', 'content': 0.15414412319660187, 'timestamp': '2025-10-01 04:31:08.177962', 'step': 10565, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:31:08.230247', 'step': 10565, 'epoch': 2} {'type': 'loss', 'content': 0.1290387660264969, 'timestamp': '2025-10-01 04:31:08.233154', 'step': 10566, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:08.285562', 'step': 10566, 'epoch': 2} {'type': 'loss', 'content': 0.08283292502164841, 'timestamp': '2025-10-01 04:31:08.288020', 'step': 10567, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:08.340837', 'step': 10567, 'epoch': 2} {'type': 'loss', 'content': 0.10018081963062286, 'timestamp': '2025-10-01 04:31:08.346441', 'step': 10568, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:08.398980', 'step': 10568, 'epoch': 2} {'type': 'loss', 'content': 0.07746803015470505, 'timestamp': '2025-10-01 04:31:08.401004', 'step': 10569, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:08.454064', 'step': 10569, 'epoch': 2} {'type': 'loss', 'content': 0.11528750509023666, 'timestamp': '2025-10-01 04:31:08.456229', 'step': 10570, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:08.509175', 'step': 10570, 'epoch': 2} {'type': 'loss', 'content': 0.09357175976037979, 'timestamp': '2025-10-01 04:31:08.511382', 'step': 10571, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:08.566147', 'step': 10571, 'epoch': 2} {'type': 'loss', 'content': 0.16457022726535797, 'timestamp': '2025-10-01 04:31:08.571796', 'step': 10572, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:08.632439', 'step': 10572, 'epoch': 2} {'type': 'loss', 'content': 0.12651261687278748, 'timestamp': '2025-10-01 04:31:08.635089', 'step': 10573, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:08.689528', 'step': 10573, 'epoch': 2} {'type': 'loss', 'content': 0.15243595838546753, 'timestamp': '2025-10-01 04:31:08.693070', 'step': 10574, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:08.745721', 'step': 10574, 'epoch': 2} {'type': 'loss', 'content': 0.12949524819850922, 'timestamp': '2025-10-01 04:31:08.748014', 'step': 10575, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:31:08.801094', 'step': 10575, 'epoch': 2} {'type': 'loss', 'content': 0.22279515862464905, 'timestamp': '2025-10-01 04:31:08.806923', 'step': 10576, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:08.860385', 'step': 10576, 'epoch': 2} {'type': 'loss', 'content': 0.15834853053092957, 'timestamp': '2025-10-01 04:31:08.862671', 'step': 10577, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:08.915393', 'step': 10577, 'epoch': 2} {'type': 'loss', 'content': 0.07162570208311081, 'timestamp': '2025-10-01 04:31:08.917626', 'step': 10578, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:31:08.971627', 'step': 10578, 'epoch': 2} {'type': 'loss', 'content': 0.13479945063591003, 'timestamp': '2025-10-01 04:31:08.974467', 'step': 10579, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:09.036399', 'step': 10579, 'epoch': 2} {'type': 'loss', 'content': 0.05075564980506897, 'timestamp': '2025-10-01 04:31:09.042579', 'step': 10580, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:09.094717', 'step': 10580, 'epoch': 2} {'type': 'loss', 'content': 0.10365508496761322, 'timestamp': '2025-10-01 04:31:09.096829', 'step': 10581, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:31:09.149550', 'step': 10581, 'epoch': 2} {'type': 'loss', 'content': 0.13891912996768951, 'timestamp': '2025-10-01 04:31:09.151882', 'step': 10582, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:09.204897', 'step': 10582, 'epoch': 2} {'type': 'loss', 'content': 0.0917574092745781, 'timestamp': '2025-10-01 04:31:09.206847', 'step': 10583, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:09.259104', 'step': 10583, 'epoch': 2} {'type': 'loss', 'content': 0.07669097930192947, 'timestamp': '2025-10-01 04:31:09.264772', 'step': 10584, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:09.316858', 'step': 10584, 'epoch': 2} {'type': 'loss', 'content': 0.11141885817050934, 'timestamp': '2025-10-01 04:31:09.319220', 'step': 10585, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:09.372297', 'step': 10585, 'epoch': 2} {'type': 'loss', 'content': 0.13332784175872803, 'timestamp': '2025-10-01 04:31:09.374359', 'step': 10586, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:31:09.427478', 'step': 10586, 'epoch': 2} {'type': 'loss', 'content': 0.09406871348619461, 'timestamp': '2025-10-01 04:31:09.429413', 'step': 10587, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:31:09.482281', 'step': 10587, 'epoch': 2} {'type': 'loss', 'content': 0.08319500833749771, 'timestamp': '2025-10-01 04:31:09.487814', 'step': 10588, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:31:09.539600', 'step': 10588, 'epoch': 2} {'type': 'loss', 'content': 0.10516011714935303, 'timestamp': '2025-10-01 04:31:09.541903', 'step': 10589, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:09.594728', 'step': 10589, 'epoch': 2} {'type': 'loss', 'content': 0.17931729555130005, 'timestamp': '2025-10-01 04:31:09.597069', 'step': 10590, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:09.649497', 'step': 10590, 'epoch': 2} {'type': 'loss', 'content': 0.1279616802930832, 'timestamp': '2025-10-01 04:31:09.651778', 'step': 10591, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:09.705147', 'step': 10591, 'epoch': 2} {'type': 'loss', 'content': 0.12114546447992325, 'timestamp': '2025-10-01 04:31:09.710772', 'step': 10592, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:09.763090', 'step': 10592, 'epoch': 2} {'type': 'loss', 'content': 0.18522168695926666, 'timestamp': '2025-10-01 04:31:09.765060', 'step': 10593, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:09.817316', 'step': 10593, 'epoch': 2} {'type': 'loss', 'content': 0.09752453863620758, 'timestamp': '2025-10-01 04:31:09.819279', 'step': 10594, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:09.871726', 'step': 10594, 'epoch': 2} {'type': 'loss', 'content': 0.13348132371902466, 'timestamp': '2025-10-01 04:31:09.873658', 'step': 10595, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:09.926693', 'step': 10595, 'epoch': 2} {'type': 'loss', 'content': 0.1393376886844635, 'timestamp': '2025-10-01 04:31:09.932144', 'step': 10596, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:31:09.984393', 'step': 10596, 'epoch': 2} {'type': 'loss', 'content': 0.08779745548963547, 'timestamp': '2025-10-01 04:31:09.986371', 'step': 10597, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:10.039096', 'step': 10597, 'epoch': 2} {'type': 'loss', 'content': 0.07947792857885361, 'timestamp': '2025-10-01 04:31:10.041390', 'step': 10598, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:10.093983', 'step': 10598, 'epoch': 2} {'type': 'loss', 'content': 0.09154775738716125, 'timestamp': '2025-10-01 04:31:10.096326', 'step': 10599, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:10.151724', 'step': 10599, 'epoch': 2} {'type': 'loss', 'content': 0.19060161709785461, 'timestamp': '2025-10-01 04:31:10.157265', 'step': 10600, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-01 04:31:10.210897', 'step': 10600, 'epoch': 2} {'type': 'loss', 'content': 0.16062325239181519, 'timestamp': '2025-10-01 04:31:10.213220', 'step': 10601, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:10.266950', 'step': 10601, 'epoch': 2} {'type': 'loss', 'content': 0.08384179323911667, 'timestamp': '2025-10-01 04:31:10.269344', 'step': 10602, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:10.324078', 'step': 10602, 'epoch': 2} {'type': 'loss', 'content': 0.2525590658187866, 'timestamp': '2025-10-01 04:31:10.326343', 'step': 10603, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:10.380712', 'step': 10603, 'epoch': 2} {'type': 'loss', 'content': 0.10614354908466339, 'timestamp': '2025-10-01 04:31:10.386684', 'step': 10604, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:10.439722', 'step': 10604, 'epoch': 2} {'type': 'loss', 'content': 0.14744959771633148, 'timestamp': '2025-10-01 04:31:10.442238', 'step': 10605, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:10.506738', 'step': 10605, 'epoch': 2} {'type': 'loss', 'content': 0.18148402869701385, 'timestamp': '2025-10-01 04:31:10.509146', 'step': 10606, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:10.564288', 'step': 10606, 'epoch': 2} {'type': 'loss', 'content': 0.15723378956317902, 'timestamp': '2025-10-01 04:31:10.566530', 'step': 10607, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:10.627026', 'step': 10607, 'epoch': 2} {'type': 'loss', 'content': 0.21002556383609772, 'timestamp': '2025-10-01 04:31:10.632626', 'step': 10608, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:10.686099', 'step': 10608, 'epoch': 2} {'type': 'loss', 'content': 0.15625722706317902, 'timestamp': '2025-10-01 04:31:10.688169', 'step': 10609, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:10.741530', 'step': 10609, 'epoch': 2} {'type': 'loss', 'content': 0.17517830431461334, 'timestamp': '2025-10-01 04:31:10.744398', 'step': 10610, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:10.801155', 'step': 10610, 'epoch': 2} {'type': 'loss', 'content': 0.26414358615875244, 'timestamp': '2025-10-01 04:31:10.803723', 'step': 10611, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:10.857993', 'step': 10611, 'epoch': 2} {'type': 'loss', 'content': 0.11404126137495041, 'timestamp': '2025-10-01 04:31:10.863707', 'step': 10612, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:10.916978', 'step': 10612, 'epoch': 2} {'type': 'loss', 'content': 0.11477120965719223, 'timestamp': '2025-10-01 04:31:10.919407', 'step': 10613, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:10.974076', 'step': 10613, 'epoch': 2} {'type': 'loss', 'content': 0.11832880973815918, 'timestamp': '2025-10-01 04:31:10.976833', 'step': 10614, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:11.039598', 'step': 10614, 'epoch': 2} {'type': 'loss', 'content': 0.168944850564003, 'timestamp': '2025-10-01 04:31:11.042149', 'step': 10615, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:11.095996', 'step': 10615, 'epoch': 2} {'type': 'loss', 'content': 0.14848005771636963, 'timestamp': '2025-10-01 04:31:11.101516', 'step': 10616, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:31:11.154702', 'step': 10616, 'epoch': 2} {'type': 'loss', 'content': 0.17990228533744812, 'timestamp': '2025-10-01 04:31:11.157153', 'step': 10617, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:11.219248', 'step': 10617, 'epoch': 2} {'type': 'loss', 'content': 0.12170396745204926, 'timestamp': '2025-10-01 04:31:11.222274', 'step': 10618, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:11.276140', 'step': 10618, 'epoch': 2} {'type': 'loss', 'content': 0.08578654378652573, 'timestamp': '2025-10-01 04:31:11.278851', 'step': 10619, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:11.333005', 'step': 10619, 'epoch': 2} {'type': 'loss', 'content': 0.14805054664611816, 'timestamp': '2025-10-01 04:31:11.338858', 'step': 10620, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:31:11.391490', 'step': 10620, 'epoch': 2} {'type': 'loss', 'content': 0.1670241355895996, 'timestamp': '2025-10-01 04:31:11.393596', 'step': 10621, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:11.446767', 'step': 10621, 'epoch': 2} {'type': 'loss', 'content': 0.10158468782901764, 'timestamp': '2025-10-01 04:31:11.448904', 'step': 10622, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:11.503137', 'step': 10622, 'epoch': 2} {'type': 'loss', 'content': 0.176627516746521, 'timestamp': '2025-10-01 04:31:11.505020', 'step': 10623, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:11.557851', 'step': 10623, 'epoch': 2} {'type': 'loss', 'content': 0.17983579635620117, 'timestamp': '2025-10-01 04:31:11.563450', 'step': 10624, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:31:11.615740', 'step': 10624, 'epoch': 2} {'type': 'loss', 'content': 0.23010720312595367, 'timestamp': '2025-10-01 04:31:11.617769', 'step': 10625, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:11.670094', 'step': 10625, 'epoch': 2} {'type': 'loss', 'content': 0.14002364873886108, 'timestamp': '2025-10-01 04:31:11.672347', 'step': 10626, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:11.725647', 'step': 10626, 'epoch': 2} {'type': 'loss', 'content': 0.19658738374710083, 'timestamp': '2025-10-01 04:31:11.729559', 'step': 10627, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:31:11.791839', 'step': 10627, 'epoch': 2} {'type': 'loss', 'content': 0.2042338252067566, 'timestamp': '2025-10-01 04:31:11.802178', 'step': 10628, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:11.854090', 'step': 10628, 'epoch': 2} {'type': 'loss', 'content': 0.08103474229574203, 'timestamp': '2025-10-01 04:31:11.856071', 'step': 10629, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:11.908878', 'step': 10629, 'epoch': 2} {'type': 'loss', 'content': 0.11209018528461456, 'timestamp': '2025-10-01 04:31:11.911057', 'step': 10630, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:11.965223', 'step': 10630, 'epoch': 2} {'type': 'loss', 'content': 0.09345703572034836, 'timestamp': '2025-10-01 04:31:11.968184', 'step': 10631, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:12.021679', 'step': 10631, 'epoch': 2} {'type': 'loss', 'content': 0.18397614359855652, 'timestamp': '2025-10-01 04:31:12.027705', 'step': 10632, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:12.084969', 'step': 10632, 'epoch': 2} {'type': 'loss', 'content': 0.1563652902841568, 'timestamp': '2025-10-01 04:31:12.087182', 'step': 10633, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:12.140008', 'step': 10633, 'epoch': 2} {'type': 'loss', 'content': 0.14753884077072144, 'timestamp': '2025-10-01 04:31:12.142328', 'step': 10634, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:31:12.195688', 'step': 10634, 'epoch': 2} {'type': 'loss', 'content': 0.14058800041675568, 'timestamp': '2025-10-01 04:31:12.207858', 'step': 10635, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:12.262293', 'step': 10635, 'epoch': 2} {'type': 'loss', 'content': 0.16196474432945251, 'timestamp': '2025-10-01 04:31:12.268014', 'step': 10636, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:12.321097', 'step': 10636, 'epoch': 2} {'type': 'loss', 'content': 0.1402381956577301, 'timestamp': '2025-10-01 04:31:12.323427', 'step': 10637, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:12.376387', 'step': 10637, 'epoch': 2} {'type': 'loss', 'content': 0.18292401731014252, 'timestamp': '2025-10-01 04:31:12.378595', 'step': 10638, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:12.431910', 'step': 10638, 'epoch': 2} {'type': 'loss', 'content': 0.15798214077949524, 'timestamp': '2025-10-01 04:31:12.434062', 'step': 10639, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:12.486842', 'step': 10639, 'epoch': 2} {'type': 'loss', 'content': 0.09933512657880783, 'timestamp': '2025-10-01 04:31:12.493417', 'step': 10640, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:12.546953', 'step': 10640, 'epoch': 2} {'type': 'loss', 'content': 0.1275620311498642, 'timestamp': '2025-10-01 04:31:12.549115', 'step': 10641, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:12.607792', 'step': 10641, 'epoch': 2} {'type': 'loss', 'content': 0.08749200403690338, 'timestamp': '2025-10-01 04:31:12.610040', 'step': 10642, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:12.662994', 'step': 10642, 'epoch': 2} {'type': 'loss', 'content': 0.08584555983543396, 'timestamp': '2025-10-01 04:31:12.665094', 'step': 10643, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:12.718425', 'step': 10643, 'epoch': 2} {'type': 'loss', 'content': 0.1179865375161171, 'timestamp': '2025-10-01 04:31:12.724240', 'step': 10644, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:12.776727', 'step': 10644, 'epoch': 2} {'type': 'loss', 'content': 0.23818126320838928, 'timestamp': '2025-10-01 04:31:12.778919', 'step': 10645, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:12.834706', 'step': 10645, 'epoch': 2} {'type': 'loss', 'content': 0.12086980044841766, 'timestamp': '2025-10-01 04:31:12.836920', 'step': 10646, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:12.890667', 'step': 10646, 'epoch': 2} {'type': 'loss', 'content': 0.11269508302211761, 'timestamp': '2025-10-01 04:31:12.892975', 'step': 10647, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:12.947831', 'step': 10647, 'epoch': 2} {'type': 'loss', 'content': 0.18623192608356476, 'timestamp': '2025-10-01 04:31:12.953657', 'step': 10648, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:13.006918', 'step': 10648, 'epoch': 2} {'type': 'loss', 'content': 0.11678919196128845, 'timestamp': '2025-10-01 04:31:13.021197', 'step': 10649, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:13.074506', 'step': 10649, 'epoch': 2} {'type': 'loss', 'content': 0.0967792198061943, 'timestamp': '2025-10-01 04:31:13.076839', 'step': 10650, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:13.129451', 'step': 10650, 'epoch': 2} {'type': 'loss', 'content': 0.09299717098474503, 'timestamp': '2025-10-01 04:31:13.131546', 'step': 10651, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:13.184075', 'step': 10651, 'epoch': 2} {'type': 'loss', 'content': 0.10808054357767105, 'timestamp': '2025-10-01 04:31:13.206948', 'step': 10652, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:13.259199', 'step': 10652, 'epoch': 2} {'type': 'loss', 'content': 0.09233951568603516, 'timestamp': '2025-10-01 04:31:13.261348', 'step': 10653, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:31:13.321345', 'step': 10653, 'epoch': 2} {'type': 'loss', 'content': 0.14510734379291534, 'timestamp': '2025-10-01 04:31:13.324216', 'step': 10654, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:13.377142', 'step': 10654, 'epoch': 2} {'type': 'loss', 'content': 0.08684414625167847, 'timestamp': '2025-10-01 04:31:13.379420', 'step': 10655, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:13.440956', 'step': 10655, 'epoch': 2} {'type': 'loss', 'content': 0.08672148734331131, 'timestamp': '2025-10-01 04:31:13.446844', 'step': 10656, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:13.499318', 'step': 10656, 'epoch': 2} {'type': 'loss', 'content': 0.06726489216089249, 'timestamp': '2025-10-01 04:31:13.501194', 'step': 10657, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:31:13.554116', 'step': 10657, 'epoch': 2} {'type': 'loss', 'content': 0.06701448559761047, 'timestamp': '2025-10-01 04:31:13.556545', 'step': 10658, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:13.610197', 'step': 10658, 'epoch': 2} {'type': 'loss', 'content': 0.15211531519889832, 'timestamp': '2025-10-01 04:31:13.612479', 'step': 10659, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:13.666455', 'step': 10659, 'epoch': 2} {'type': 'loss', 'content': 0.09735745936632156, 'timestamp': '2025-10-01 04:31:13.672503', 'step': 10660, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:13.731278', 'step': 10660, 'epoch': 2} {'type': 'loss', 'content': 0.16214853525161743, 'timestamp': '2025-10-01 04:31:13.733598', 'step': 10661, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:13.796100', 'step': 10661, 'epoch': 2} {'type': 'loss', 'content': 0.17899557948112488, 'timestamp': '2025-10-01 04:31:13.798807', 'step': 10662, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:13.853333', 'step': 10662, 'epoch': 2} {'type': 'loss', 'content': 0.2061954140663147, 'timestamp': '2025-10-01 04:31:13.855840', 'step': 10663, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:13.908736', 'step': 10663, 'epoch': 2} {'type': 'loss', 'content': 0.18923142552375793, 'timestamp': '2025-10-01 04:31:13.914535', 'step': 10664, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:13.967188', 'step': 10664, 'epoch': 2} {'type': 'loss', 'content': 0.12069204449653625, 'timestamp': '2025-10-01 04:31:13.969360', 'step': 10665, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:14.022335', 'step': 10665, 'epoch': 2} {'type': 'loss', 'content': 0.11384955793619156, 'timestamp': '2025-10-01 04:31:14.024578', 'step': 10666, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:31:14.077968', 'step': 10666, 'epoch': 2} {'type': 'loss', 'content': 0.12487868964672089, 'timestamp': '2025-10-01 04:31:14.080310', 'step': 10667, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:31:14.133571', 'step': 10667, 'epoch': 2} {'type': 'loss', 'content': 0.10038641095161438, 'timestamp': '2025-10-01 04:31:14.139449', 'step': 10668, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:14.203595', 'step': 10668, 'epoch': 2} {'type': 'loss', 'content': 0.07298862189054489, 'timestamp': '2025-10-01 04:31:14.205691', 'step': 10669, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:14.270286', 'step': 10669, 'epoch': 2} {'type': 'loss', 'content': 0.18935531377792358, 'timestamp': '2025-10-01 04:31:14.272486', 'step': 10670, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:14.326078', 'step': 10670, 'epoch': 2} {'type': 'loss', 'content': 0.13957050442695618, 'timestamp': '2025-10-01 04:31:14.328656', 'step': 10671, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:14.382776', 'step': 10671, 'epoch': 2} {'type': 'loss', 'content': 0.10113541781902313, 'timestamp': '2025-10-01 04:31:14.398817', 'step': 10672, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:14.451237', 'step': 10672, 'epoch': 2} {'type': 'loss', 'content': 0.11977840214967728, 'timestamp': '2025-10-01 04:31:14.453222', 'step': 10673, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:14.506198', 'step': 10673, 'epoch': 2} {'type': 'loss', 'content': 0.13414038717746735, 'timestamp': '2025-10-01 04:31:14.509136', 'step': 10674, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:14.562275', 'step': 10674, 'epoch': 2} {'type': 'loss', 'content': 0.15779714286327362, 'timestamp': '2025-10-01 04:31:14.564614', 'step': 10675, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:14.617754', 'step': 10675, 'epoch': 2} {'type': 'loss', 'content': 0.10750159621238708, 'timestamp': '2025-10-01 04:31:14.623597', 'step': 10676, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:14.676394', 'step': 10676, 'epoch': 2} {'type': 'loss', 'content': 0.1869019716978073, 'timestamp': '2025-10-01 04:31:14.679160', 'step': 10677, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:31:14.732394', 'step': 10677, 'epoch': 2} {'type': 'loss', 'content': 0.1187424287199974, 'timestamp': '2025-10-01 04:31:14.734568', 'step': 10678, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:14.787762', 'step': 10678, 'epoch': 2} {'type': 'loss', 'content': 0.1456775665283203, 'timestamp': '2025-10-01 04:31:14.790071', 'step': 10679, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:14.842598', 'step': 10679, 'epoch': 2} {'type': 'loss', 'content': 0.10371354222297668, 'timestamp': '2025-10-01 04:31:14.848866', 'step': 10680, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:14.902197', 'step': 10680, 'epoch': 2} {'type': 'loss', 'content': 0.17466109991073608, 'timestamp': '2025-10-01 04:31:14.904355', 'step': 10681, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:14.957876', 'step': 10681, 'epoch': 2} {'type': 'loss', 'content': 0.12337960302829742, 'timestamp': '2025-10-01 04:31:14.959971', 'step': 10682, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:15.012359', 'step': 10682, 'epoch': 2} {'type': 'loss', 'content': 0.18650081753730774, 'timestamp': '2025-10-01 04:31:15.015058', 'step': 10683, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-01 04:31:15.068176', 'step': 10683, 'epoch': 2} {'type': 'loss', 'content': 0.12692509591579437, 'timestamp': '2025-10-01 04:31:15.073717', 'step': 10684, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:31:15.126020', 'step': 10684, 'epoch': 2} {'type': 'loss', 'content': 0.10512293130159378, 'timestamp': '2025-10-01 04:31:15.128166', 'step': 10685, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:15.181708', 'step': 10685, 'epoch': 2} {'type': 'loss', 'content': 0.08780630677938461, 'timestamp': '2025-10-01 04:31:15.183994', 'step': 10686, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:15.237002', 'step': 10686, 'epoch': 2} {'type': 'loss', 'content': 0.1654774248600006, 'timestamp': '2025-10-01 04:31:15.238975', 'step': 10687, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:15.295187', 'step': 10687, 'epoch': 2} {'type': 'loss', 'content': 0.21613909304141998, 'timestamp': '2025-10-01 04:31:15.320060', 'step': 10688, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:15.374693', 'step': 10688, 'epoch': 2} {'type': 'loss', 'content': 0.08709351718425751, 'timestamp': '2025-10-01 04:31:15.377726', 'step': 10689, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:15.433635', 'step': 10689, 'epoch': 2} {'type': 'loss', 'content': 0.13425861299037933, 'timestamp': '2025-10-01 04:31:15.435999', 'step': 10690, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:31:15.491636', 'step': 10690, 'epoch': 2} {'type': 'loss', 'content': 0.1054302230477333, 'timestamp': '2025-10-01 04:31:15.494322', 'step': 10691, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:15.547326', 'step': 10691, 'epoch': 2} {'type': 'loss', 'content': 0.1242978572845459, 'timestamp': '2025-10-01 04:31:15.552911', 'step': 10692, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:31:15.605284', 'step': 10692, 'epoch': 2} {'type': 'loss', 'content': 0.06500596553087234, 'timestamp': '2025-10-01 04:31:15.607371', 'step': 10693, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:15.660706', 'step': 10693, 'epoch': 2} {'type': 'loss', 'content': 0.10466832667589188, 'timestamp': '2025-10-01 04:31:15.662811', 'step': 10694, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:31:15.716610', 'step': 10694, 'epoch': 2} {'type': 'loss', 'content': 0.2208327353000641, 'timestamp': '2025-10-01 04:31:15.720223', 'step': 10695, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:15.773480', 'step': 10695, 'epoch': 2} {'type': 'loss', 'content': 0.1124085783958435, 'timestamp': '2025-10-01 04:31:15.779155', 'step': 10696, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:15.831708', 'step': 10696, 'epoch': 2} {'type': 'loss', 'content': 0.1508416086435318, 'timestamp': '2025-10-01 04:31:15.833888', 'step': 10697, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:15.887123', 'step': 10697, 'epoch': 2} {'type': 'loss', 'content': 0.1706397384405136, 'timestamp': '2025-10-01 04:31:15.889133', 'step': 10698, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:15.941923', 'step': 10698, 'epoch': 2} {'type': 'loss', 'content': 0.10493861138820648, 'timestamp': '2025-10-01 04:31:15.944080', 'step': 10699, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:15.997506', 'step': 10699, 'epoch': 2} {'type': 'loss', 'content': 0.1253451108932495, 'timestamp': '2025-10-01 04:31:16.005070', 'step': 10700, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:16.057578', 'step': 10700, 'epoch': 2} {'type': 'loss', 'content': 0.11932828277349472, 'timestamp': '2025-10-01 04:31:16.059819', 'step': 10701, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:16.112680', 'step': 10701, 'epoch': 2} {'type': 'loss', 'content': 0.2107783406972885, 'timestamp': '2025-10-01 04:31:16.129833', 'step': 10702, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:16.185378', 'step': 10702, 'epoch': 2} {'type': 'loss', 'content': 0.10324577242136002, 'timestamp': '2025-10-01 04:31:16.187347', 'step': 10703, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:16.240292', 'step': 10703, 'epoch': 2} {'type': 'loss', 'content': 0.18888618052005768, 'timestamp': '2025-10-01 04:31:16.246126', 'step': 10704, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:16.314629', 'step': 10704, 'epoch': 2} {'type': 'loss', 'content': 0.06454597413539886, 'timestamp': '2025-10-01 04:31:16.317165', 'step': 10705, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:16.375130', 'step': 10705, 'epoch': 2} {'type': 'loss', 'content': 0.19405832886695862, 'timestamp': '2025-10-01 04:31:16.377242', 'step': 10706, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:16.430496', 'step': 10706, 'epoch': 2} {'type': 'loss', 'content': 0.05784307420253754, 'timestamp': '2025-10-01 04:31:16.432546', 'step': 10707, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:16.485414', 'step': 10707, 'epoch': 2} {'type': 'loss', 'content': 0.19190049171447754, 'timestamp': '2025-10-01 04:31:16.491329', 'step': 10708, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:16.544494', 'step': 10708, 'epoch': 2} {'type': 'loss', 'content': 0.20342622697353363, 'timestamp': '2025-10-01 04:31:16.557097', 'step': 10709, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:16.620972', 'step': 10709, 'epoch': 2} {'type': 'loss', 'content': 0.14809384942054749, 'timestamp': '2025-10-01 04:31:16.623206', 'step': 10710, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:16.676116', 'step': 10710, 'epoch': 2} {'type': 'loss', 'content': 0.1280042678117752, 'timestamp': '2025-10-01 04:31:16.678371', 'step': 10711, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:16.731040', 'step': 10711, 'epoch': 2} {'type': 'loss', 'content': 0.10943004488945007, 'timestamp': '2025-10-01 04:31:16.736870', 'step': 10712, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:31:16.789529', 'step': 10712, 'epoch': 2} {'type': 'loss', 'content': 0.07892156392335892, 'timestamp': '2025-10-01 04:31:16.792452', 'step': 10713, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:16.847605', 'step': 10713, 'epoch': 2} {'type': 'loss', 'content': 0.08422545343637466, 'timestamp': '2025-10-01 04:31:16.850009', 'step': 10714, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:16.903546', 'step': 10714, 'epoch': 2} {'type': 'loss', 'content': 0.13651108741760254, 'timestamp': '2025-10-01 04:31:16.905933', 'step': 10715, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:16.958836', 'step': 10715, 'epoch': 2} {'type': 'loss', 'content': 0.07269826531410217, 'timestamp': '2025-10-01 04:31:16.964581', 'step': 10716, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:17.017814', 'step': 10716, 'epoch': 2} {'type': 'loss', 'content': 0.1140759289264679, 'timestamp': '2025-10-01 04:31:17.020123', 'step': 10717, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:17.073216', 'step': 10717, 'epoch': 2} {'type': 'loss', 'content': 0.1185142919421196, 'timestamp': '2025-10-01 04:31:17.075346', 'step': 10718, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-01 04:31:30.225522', 'step': 10718, 'epoch': 2} {'type': 'pplx', 'content': 11959.24077226286, 'timestamp': '2025-10-01 04:31:30.228386', 'step': 10718, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:30.282653', 'step': 10718, 'epoch': 2} {'type': 'loss', 'content': 0.18903964757919312, 'timestamp': '2025-10-01 04:31:30.284875', 'step': 10719, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:30.338855', 'step': 10719, 'epoch': 2} {'type': 'loss', 'content': 0.11506246030330658, 'timestamp': '2025-10-01 04:31:30.346299', 'step': 10720, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:30.399760', 'step': 10720, 'epoch': 2} {'type': 'loss', 'content': 0.12748631834983826, 'timestamp': '2025-10-01 04:31:30.401556', 'step': 10721, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:30.454784', 'step': 10721, 'epoch': 2} {'type': 'loss', 'content': 0.0658758357167244, 'timestamp': '2025-10-01 04:31:30.456980', 'step': 10722, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:30.513565', 'step': 10722, 'epoch': 2} {'type': 'loss', 'content': 0.13570594787597656, 'timestamp': '2025-10-01 04:31:30.516253', 'step': 10723, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:31:30.570726', 'step': 10723, 'epoch': 2} {'type': 'loss', 'content': 0.1307535618543625, 'timestamp': '2025-10-01 04:31:30.576574', 'step': 10724, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:31:30.629270', 'step': 10724, 'epoch': 2} {'type': 'loss', 'content': 0.11404520273208618, 'timestamp': '2025-10-01 04:31:30.631332', 'step': 10725, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:30.685122', 'step': 10725, 'epoch': 2} {'type': 'loss', 'content': 0.24674011766910553, 'timestamp': '2025-10-01 04:31:30.687523', 'step': 10726, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:30.741417', 'step': 10726, 'epoch': 2} {'type': 'loss', 'content': 0.1881864219903946, 'timestamp': '2025-10-01 04:31:30.743536', 'step': 10727, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:30.796782', 'step': 10727, 'epoch': 2} {'type': 'loss', 'content': 0.11049898713827133, 'timestamp': '2025-10-01 04:31:30.802517', 'step': 10728, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:30.857504', 'step': 10728, 'epoch': 2} {'type': 'loss', 'content': 0.14083753526210785, 'timestamp': '2025-10-01 04:31:30.859563', 'step': 10729, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:30.911989', 'step': 10729, 'epoch': 2} {'type': 'loss', 'content': 0.1233152523636818, 'timestamp': '2025-10-01 04:31:30.914250', 'step': 10730, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:30.967805', 'step': 10730, 'epoch': 2} {'type': 'loss', 'content': 0.10862395912408829, 'timestamp': '2025-10-01 04:31:30.969936', 'step': 10731, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:31.023578', 'step': 10731, 'epoch': 2} {'type': 'loss', 'content': 0.04118986800312996, 'timestamp': '2025-10-01 04:31:31.029393', 'step': 10732, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:31.082140', 'step': 10732, 'epoch': 2} {'type': 'loss', 'content': 0.15336857736110687, 'timestamp': '2025-10-01 04:31:31.089036', 'step': 10733, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:31.142523', 'step': 10733, 'epoch': 2} {'type': 'loss', 'content': 0.10527434200048447, 'timestamp': '2025-10-01 04:31:31.144593', 'step': 10734, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:31.197834', 'step': 10734, 'epoch': 2} {'type': 'loss', 'content': 0.052742309868335724, 'timestamp': '2025-10-01 04:31:31.199706', 'step': 10735, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:31.252950', 'step': 10735, 'epoch': 2} {'type': 'loss', 'content': 0.13664068281650543, 'timestamp': '2025-10-01 04:31:31.271706', 'step': 10736, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:31.324194', 'step': 10736, 'epoch': 2} {'type': 'loss', 'content': 0.14155690371990204, 'timestamp': '2025-10-01 04:31:31.326439', 'step': 10737, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:31.379772', 'step': 10737, 'epoch': 2} {'type': 'loss', 'content': 0.08619280904531479, 'timestamp': '2025-10-01 04:31:31.381941', 'step': 10738, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:31.443202', 'step': 10738, 'epoch': 2} {'type': 'loss', 'content': 0.20138812065124512, 'timestamp': '2025-10-01 04:31:31.445051', 'step': 10739, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:31.498719', 'step': 10739, 'epoch': 2} {'type': 'loss', 'content': 0.11817607283592224, 'timestamp': '2025-10-01 04:31:31.505665', 'step': 10740, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:31:31.559867', 'step': 10740, 'epoch': 2} {'type': 'loss', 'content': 0.08526372164487839, 'timestamp': '2025-10-01 04:31:31.562879', 'step': 10741, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:31.616227', 'step': 10741, 'epoch': 2} {'type': 'loss', 'content': 0.05857646465301514, 'timestamp': '2025-10-01 04:31:31.618258', 'step': 10742, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:31.672500', 'step': 10742, 'epoch': 2} {'type': 'loss', 'content': 0.17432795464992523, 'timestamp': '2025-10-01 04:31:31.674915', 'step': 10743, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:31.728407', 'step': 10743, 'epoch': 2} {'type': 'loss', 'content': 0.1304473876953125, 'timestamp': '2025-10-01 04:31:31.739404', 'step': 10744, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:31.792364', 'step': 10744, 'epoch': 2} {'type': 'loss', 'content': 0.22142764925956726, 'timestamp': '2025-10-01 04:31:31.800631', 'step': 10745, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:31.854340', 'step': 10745, 'epoch': 2} {'type': 'loss', 'content': 0.1114291399717331, 'timestamp': '2025-10-01 04:31:31.856486', 'step': 10746, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:31.911200', 'step': 10746, 'epoch': 2} {'type': 'loss', 'content': 0.122312530875206, 'timestamp': '2025-10-01 04:31:31.913503', 'step': 10747, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:31.970012', 'step': 10747, 'epoch': 2} {'type': 'loss', 'content': 0.1772264987230301, 'timestamp': '2025-10-01 04:31:31.975435', 'step': 10748, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:31:32.031418', 'step': 10748, 'epoch': 2} {'type': 'loss', 'content': 0.10967856645584106, 'timestamp': '2025-10-01 04:31:32.033191', 'step': 10749, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:32.087952', 'step': 10749, 'epoch': 2} {'type': 'loss', 'content': 0.2650395929813385, 'timestamp': '2025-10-01 04:31:32.089964', 'step': 10750, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:32.143558', 'step': 10750, 'epoch': 2} {'type': 'loss', 'content': 0.1500789225101471, 'timestamp': '2025-10-01 04:31:32.145804', 'step': 10751, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:32.199097', 'step': 10751, 'epoch': 2} {'type': 'loss', 'content': 0.02529032900929451, 'timestamp': '2025-10-01 04:31:32.204923', 'step': 10752, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:32.265704', 'step': 10752, 'epoch': 2} {'type': 'loss', 'content': 0.1980046033859253, 'timestamp': '2025-10-01 04:31:32.267906', 'step': 10753, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:32.327032', 'step': 10753, 'epoch': 2} {'type': 'loss', 'content': 0.16767333447933197, 'timestamp': '2025-10-01 04:31:32.329133', 'step': 10754, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:32.387853', 'step': 10754, 'epoch': 2} {'type': 'loss', 'content': 0.10138025134801865, 'timestamp': '2025-10-01 04:31:32.390951', 'step': 10755, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:31:32.444890', 'step': 10755, 'epoch': 2} {'type': 'loss', 'content': 0.0921843871474266, 'timestamp': '2025-10-01 04:31:32.450479', 'step': 10756, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:32.507598', 'step': 10756, 'epoch': 2} {'type': 'loss', 'content': 0.09748004376888275, 'timestamp': '2025-10-01 04:31:32.509556', 'step': 10757, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:32.563809', 'step': 10757, 'epoch': 2} {'type': 'loss', 'content': 0.09194950759410858, 'timestamp': '2025-10-01 04:31:32.566053', 'step': 10758, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:32.629507', 'step': 10758, 'epoch': 2} {'type': 'loss', 'content': 0.09787303954362869, 'timestamp': '2025-10-01 04:31:32.631605', 'step': 10759, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:32.685291', 'step': 10759, 'epoch': 2} {'type': 'loss', 'content': 0.1240793839097023, 'timestamp': '2025-10-01 04:31:32.691123', 'step': 10760, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:32.744608', 'step': 10760, 'epoch': 2} {'type': 'loss', 'content': 0.11866419017314911, 'timestamp': '2025-10-01 04:31:32.746891', 'step': 10761, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:32.800880', 'step': 10761, 'epoch': 2} {'type': 'loss', 'content': 0.19413408637046814, 'timestamp': '2025-10-01 04:31:32.802975', 'step': 10762, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:32.862184', 'step': 10762, 'epoch': 2} {'type': 'loss', 'content': 0.11175825446844101, 'timestamp': '2025-10-01 04:31:32.864015', 'step': 10763, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:32.917912', 'step': 10763, 'epoch': 2} {'type': 'loss', 'content': 0.0925341472029686, 'timestamp': '2025-10-01 04:31:32.923682', 'step': 10764, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:31:32.976917', 'step': 10764, 'epoch': 2} {'type': 'loss', 'content': 0.23686812818050385, 'timestamp': '2025-10-01 04:31:32.978996', 'step': 10765, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:33.033193', 'step': 10765, 'epoch': 2} {'type': 'loss', 'content': 0.0824323296546936, 'timestamp': '2025-10-01 04:31:33.035429', 'step': 10766, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:33.090733', 'step': 10766, 'epoch': 2} {'type': 'loss', 'content': 0.13127456605434418, 'timestamp': '2025-10-01 04:31:33.093341', 'step': 10767, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:33.147629', 'step': 10767, 'epoch': 2} {'type': 'loss', 'content': 0.11734285950660706, 'timestamp': '2025-10-01 04:31:33.153457', 'step': 10768, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:33.206813', 'step': 10768, 'epoch': 2} {'type': 'loss', 'content': 0.1257951706647873, 'timestamp': '2025-10-01 04:31:33.209058', 'step': 10769, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:33.262498', 'step': 10769, 'epoch': 2} {'type': 'loss', 'content': 0.11470097303390503, 'timestamp': '2025-10-01 04:31:33.264793', 'step': 10770, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:33.319740', 'step': 10770, 'epoch': 2} {'type': 'loss', 'content': 0.1568172723054886, 'timestamp': '2025-10-01 04:31:33.322121', 'step': 10771, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:33.376493', 'step': 10771, 'epoch': 2} {'type': 'loss', 'content': 0.13432450592517853, 'timestamp': '2025-10-01 04:31:33.382469', 'step': 10772, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:33.435117', 'step': 10772, 'epoch': 2} {'type': 'loss', 'content': 0.11984612792730331, 'timestamp': '2025-10-01 04:31:33.437736', 'step': 10773, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:31:33.493005', 'step': 10773, 'epoch': 2} {'type': 'loss', 'content': 0.12685830891132355, 'timestamp': '2025-10-01 04:31:33.495540', 'step': 10774, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:31:33.550620', 'step': 10774, 'epoch': 2} {'type': 'loss', 'content': 0.1307140588760376, 'timestamp': '2025-10-01 04:31:33.562830', 'step': 10775, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:33.617323', 'step': 10775, 'epoch': 2} {'type': 'loss', 'content': 0.09959716349840164, 'timestamp': '2025-10-01 04:31:33.623089', 'step': 10776, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:31:33.679055', 'step': 10776, 'epoch': 2} {'type': 'loss', 'content': 0.08231893181800842, 'timestamp': '2025-10-01 04:31:33.682245', 'step': 10777, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:33.736818', 'step': 10777, 'epoch': 2} {'type': 'loss', 'content': 0.16531312465667725, 'timestamp': '2025-10-01 04:31:33.740035', 'step': 10778, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:33.795960', 'step': 10778, 'epoch': 2} {'type': 'loss', 'content': 0.1670263260602951, 'timestamp': '2025-10-01 04:31:33.798888', 'step': 10779, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:33.853584', 'step': 10779, 'epoch': 2} {'type': 'loss', 'content': 0.10721409320831299, 'timestamp': '2025-10-01 04:31:33.859767', 'step': 10780, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:33.914155', 'step': 10780, 'epoch': 2} {'type': 'loss', 'content': 0.15660697221755981, 'timestamp': '2025-10-01 04:31:33.917039', 'step': 10781, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:33.971889', 'step': 10781, 'epoch': 2} {'type': 'loss', 'content': 0.060226935893297195, 'timestamp': '2025-10-01 04:31:33.974214', 'step': 10782, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:31:34.041498', 'step': 10782, 'epoch': 2} {'type': 'loss', 'content': 0.043320685625076294, 'timestamp': '2025-10-01 04:31:34.043954', 'step': 10783, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:34.098678', 'step': 10783, 'epoch': 2} {'type': 'loss', 'content': 0.09871653467416763, 'timestamp': '2025-10-01 04:31:34.104863', 'step': 10784, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:34.159155', 'step': 10784, 'epoch': 2} {'type': 'loss', 'content': 0.2236206829547882, 'timestamp': '2025-10-01 04:31:34.163093', 'step': 10785, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:34.218116', 'step': 10785, 'epoch': 2} {'type': 'loss', 'content': 0.0935015007853508, 'timestamp': '2025-10-01 04:31:34.220917', 'step': 10786, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:34.275807', 'step': 10786, 'epoch': 2} {'type': 'loss', 'content': 0.1591988354921341, 'timestamp': '2025-10-01 04:31:34.290554', 'step': 10787, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-01 04:31:34.345313', 'step': 10787, 'epoch': 2} {'type': 'loss', 'content': 0.1608288288116455, 'timestamp': '2025-10-01 04:31:34.351700', 'step': 10788, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:34.410764', 'step': 10788, 'epoch': 2} {'type': 'loss', 'content': 0.19132037460803986, 'timestamp': '2025-10-01 04:31:34.416421', 'step': 10789, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:31:34.472592', 'step': 10789, 'epoch': 2} {'type': 'loss', 'content': 0.10812222957611084, 'timestamp': '2025-10-01 04:31:34.475135', 'step': 10790, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:31:34.535876', 'step': 10790, 'epoch': 2} {'type': 'loss', 'content': 0.07099345326423645, 'timestamp': '2025-10-01 04:31:34.538065', 'step': 10791, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:34.592655', 'step': 10791, 'epoch': 2} {'type': 'loss', 'content': 0.17913635075092316, 'timestamp': '2025-10-01 04:31:34.598922', 'step': 10792, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:34.654248', 'step': 10792, 'epoch': 2} {'type': 'loss', 'content': 0.07229620218276978, 'timestamp': '2025-10-01 04:31:34.657261', 'step': 10793, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:34.712338', 'step': 10793, 'epoch': 2} {'type': 'loss', 'content': 0.14206647872924805, 'timestamp': '2025-10-01 04:31:34.715047', 'step': 10794, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:31:34.769117', 'step': 10794, 'epoch': 2} {'type': 'loss', 'content': 0.06866561621427536, 'timestamp': '2025-10-01 04:31:34.771177', 'step': 10795, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:34.829765', 'step': 10795, 'epoch': 2} {'type': 'loss', 'content': 0.1543552428483963, 'timestamp': '2025-10-01 04:31:34.851180', 'step': 10796, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:34.929356', 'step': 10796, 'epoch': 2} {'type': 'loss', 'content': 0.0848194807767868, 'timestamp': '2025-10-01 04:31:34.931492', 'step': 10797, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:34.987401', 'step': 10797, 'epoch': 2} {'type': 'loss', 'content': 0.09435930848121643, 'timestamp': '2025-10-01 04:31:34.990282', 'step': 10798, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:35.045140', 'step': 10798, 'epoch': 2} {'type': 'loss', 'content': 0.19795043766498566, 'timestamp': '2025-10-01 04:31:35.047893', 'step': 10799, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-01 04:31:35.117254', 'step': 10799, 'epoch': 2} {'type': 'loss', 'content': 0.10474881529808044, 'timestamp': '2025-10-01 04:31:35.130411', 'step': 10800, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:35.183970', 'step': 10800, 'epoch': 2} {'type': 'loss', 'content': 0.16465815901756287, 'timestamp': '2025-10-01 04:31:35.187603', 'step': 10801, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:35.241275', 'step': 10801, 'epoch': 2} {'type': 'loss', 'content': 0.22802479565143585, 'timestamp': '2025-10-01 04:31:35.243434', 'step': 10802, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:35.297125', 'step': 10802, 'epoch': 2} {'type': 'loss', 'content': 0.1786828339099884, 'timestamp': '2025-10-01 04:31:35.299430', 'step': 10803, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:35.353636', 'step': 10803, 'epoch': 2} {'type': 'loss', 'content': 0.16859450936317444, 'timestamp': '2025-10-01 04:31:35.359429', 'step': 10804, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:35.412608', 'step': 10804, 'epoch': 2} {'type': 'loss', 'content': 0.11233387887477875, 'timestamp': '2025-10-01 04:31:35.414944', 'step': 10805, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:35.468650', 'step': 10805, 'epoch': 2} {'type': 'loss', 'content': 0.13589730858802795, 'timestamp': '2025-10-01 04:31:35.470964', 'step': 10806, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:35.524619', 'step': 10806, 'epoch': 2} {'type': 'loss', 'content': 0.1367778331041336, 'timestamp': '2025-10-01 04:31:35.527085', 'step': 10807, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:35.580802', 'step': 10807, 'epoch': 2} {'type': 'loss', 'content': 0.07528886944055557, 'timestamp': '2025-10-01 04:31:35.586587', 'step': 10808, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:35.639127', 'step': 10808, 'epoch': 2} {'type': 'loss', 'content': 0.19830837845802307, 'timestamp': '2025-10-01 04:31:35.641146', 'step': 10809, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:35.694977', 'step': 10809, 'epoch': 2} {'type': 'loss', 'content': 0.12032786756753922, 'timestamp': '2025-10-01 04:31:35.697200', 'step': 10810, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:35.760983', 'step': 10810, 'epoch': 2} {'type': 'loss', 'content': 0.1052442118525505, 'timestamp': '2025-10-01 04:31:35.763050', 'step': 10811, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:35.822021', 'step': 10811, 'epoch': 2} {'type': 'loss', 'content': 0.10042313486337662, 'timestamp': '2025-10-01 04:31:35.828092', 'step': 10812, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:31:35.880928', 'step': 10812, 'epoch': 2} {'type': 'loss', 'content': 0.09273727238178253, 'timestamp': '2025-10-01 04:31:35.883269', 'step': 10813, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:35.938045', 'step': 10813, 'epoch': 2} {'type': 'loss', 'content': 0.10204988718032837, 'timestamp': '2025-10-01 04:31:35.940686', 'step': 10814, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:35.994344', 'step': 10814, 'epoch': 2} {'type': 'loss', 'content': 0.20930537581443787, 'timestamp': '2025-10-01 04:31:35.996571', 'step': 10815, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:31:36.052199', 'step': 10815, 'epoch': 2} {'type': 'loss', 'content': 0.1336333006620407, 'timestamp': '2025-10-01 04:31:36.058301', 'step': 10816, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:36.117003', 'step': 10816, 'epoch': 2} {'type': 'loss', 'content': 0.10640636086463928, 'timestamp': '2025-10-01 04:31:36.119013', 'step': 10817, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:31:36.172505', 'step': 10817, 'epoch': 2} {'type': 'loss', 'content': 0.09241263568401337, 'timestamp': '2025-10-01 04:31:36.174786', 'step': 10818, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:31:36.228458', 'step': 10818, 'epoch': 2} {'type': 'loss', 'content': 0.06320738792419434, 'timestamp': '2025-10-01 04:31:36.230663', 'step': 10819, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:36.284133', 'step': 10819, 'epoch': 2} {'type': 'loss', 'content': 0.17482595145702362, 'timestamp': '2025-10-01 04:31:36.289895', 'step': 10820, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:36.343198', 'step': 10820, 'epoch': 2} {'type': 'loss', 'content': 0.1444309949874878, 'timestamp': '2025-10-01 04:31:36.345376', 'step': 10821, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:36.398558', 'step': 10821, 'epoch': 2} {'type': 'loss', 'content': 0.13587932288646698, 'timestamp': '2025-10-01 04:31:36.411435', 'step': 10822, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:36.465455', 'step': 10822, 'epoch': 2} {'type': 'loss', 'content': 0.07077141851186752, 'timestamp': '2025-10-01 04:31:36.467414', 'step': 10823, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:36.520809', 'step': 10823, 'epoch': 2} {'type': 'loss', 'content': 0.09068147093057632, 'timestamp': '2025-10-01 04:31:36.526653', 'step': 10824, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:36.579504', 'step': 10824, 'epoch': 2} {'type': 'loss', 'content': 0.21834595501422882, 'timestamp': '2025-10-01 04:31:36.582361', 'step': 10825, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:36.637240', 'step': 10825, 'epoch': 2} {'type': 'loss', 'content': 0.06586863100528717, 'timestamp': '2025-10-01 04:31:36.639649', 'step': 10826, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:36.693852', 'step': 10826, 'epoch': 2} {'type': 'loss', 'content': 0.1330915242433548, 'timestamp': '2025-10-01 04:31:36.696075', 'step': 10827, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:31:36.750554', 'step': 10827, 'epoch': 2} {'type': 'loss', 'content': 0.11626200377941132, 'timestamp': '2025-10-01 04:31:36.756641', 'step': 10828, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:36.819881', 'step': 10828, 'epoch': 2} {'type': 'loss', 'content': 0.14841823279857635, 'timestamp': '2025-10-01 04:31:36.822882', 'step': 10829, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:36.875823', 'step': 10829, 'epoch': 2} {'type': 'loss', 'content': 0.08800630271434784, 'timestamp': '2025-10-01 04:31:36.878475', 'step': 10830, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:36.936420', 'step': 10830, 'epoch': 2} {'type': 'loss', 'content': 0.1859930008649826, 'timestamp': '2025-10-01 04:31:36.938854', 'step': 10831, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:36.992625', 'step': 10831, 'epoch': 2} {'type': 'loss', 'content': 0.10962317883968353, 'timestamp': '2025-10-01 04:31:36.998433', 'step': 10832, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:37.060354', 'step': 10832, 'epoch': 2} {'type': 'loss', 'content': 0.10612817853689194, 'timestamp': '2025-10-01 04:31:37.062528', 'step': 10833, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:31:37.116059', 'step': 10833, 'epoch': 2} {'type': 'loss', 'content': 0.1302320808172226, 'timestamp': '2025-10-01 04:31:37.118098', 'step': 10834, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:37.183549', 'step': 10834, 'epoch': 2} {'type': 'loss', 'content': 0.11547336727380753, 'timestamp': '2025-10-01 04:31:37.185800', 'step': 10835, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:37.239336', 'step': 10835, 'epoch': 2} {'type': 'loss', 'content': 0.20414140820503235, 'timestamp': '2025-10-01 04:31:37.245169', 'step': 10836, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:37.297685', 'step': 10836, 'epoch': 2} {'type': 'loss', 'content': 0.10808540880680084, 'timestamp': '2025-10-01 04:31:37.299881', 'step': 10837, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:37.359831', 'step': 10837, 'epoch': 2} {'type': 'loss', 'content': 0.12358997762203217, 'timestamp': '2025-10-01 04:31:37.362135', 'step': 10838, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:37.417035', 'step': 10838, 'epoch': 2} {'type': 'loss', 'content': 0.1207532063126564, 'timestamp': '2025-10-01 04:31:37.420169', 'step': 10839, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:37.476182', 'step': 10839, 'epoch': 2} {'type': 'loss', 'content': 0.14973625540733337, 'timestamp': '2025-10-01 04:31:37.482619', 'step': 10840, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:37.535734', 'step': 10840, 'epoch': 2} {'type': 'loss', 'content': 0.11960741877555847, 'timestamp': '2025-10-01 04:31:37.538001', 'step': 10841, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:37.595260', 'step': 10841, 'epoch': 2} {'type': 'loss', 'content': 0.188544362783432, 'timestamp': '2025-10-01 04:31:37.597627', 'step': 10842, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:37.651673', 'step': 10842, 'epoch': 2} {'type': 'loss', 'content': 0.17626424133777618, 'timestamp': '2025-10-01 04:31:37.654129', 'step': 10843, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:37.707664', 'step': 10843, 'epoch': 2} {'type': 'loss', 'content': 0.08678051084280014, 'timestamp': '2025-10-01 04:31:37.713648', 'step': 10844, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:37.766534', 'step': 10844, 'epoch': 2} {'type': 'loss', 'content': 0.1524505764245987, 'timestamp': '2025-10-01 04:31:37.769338', 'step': 10845, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:37.826279', 'step': 10845, 'epoch': 2} {'type': 'loss', 'content': 0.13909213244915009, 'timestamp': '2025-10-01 04:31:37.828798', 'step': 10846, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:37.887274', 'step': 10846, 'epoch': 2} {'type': 'loss', 'content': 0.10009371489286423, 'timestamp': '2025-10-01 04:31:37.889291', 'step': 10847, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:37.944500', 'step': 10847, 'epoch': 2} {'type': 'loss', 'content': 0.07462801784276962, 'timestamp': '2025-10-01 04:31:37.950498', 'step': 10848, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:38.003365', 'step': 10848, 'epoch': 2} {'type': 'loss', 'content': 0.09714610874652863, 'timestamp': '2025-10-01 04:31:38.005667', 'step': 10849, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:38.060406', 'step': 10849, 'epoch': 2} {'type': 'loss', 'content': 0.19551575183868408, 'timestamp': '2025-10-01 04:31:38.063107', 'step': 10850, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:38.122612', 'step': 10850, 'epoch': 2} {'type': 'loss', 'content': 0.16264568269252777, 'timestamp': '2025-10-01 04:31:38.124880', 'step': 10851, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:38.179917', 'step': 10851, 'epoch': 2} {'type': 'loss', 'content': 0.14508573710918427, 'timestamp': '2025-10-01 04:31:38.185662', 'step': 10852, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:38.248282', 'step': 10852, 'epoch': 2} {'type': 'loss', 'content': 0.09447579085826874, 'timestamp': '2025-10-01 04:31:38.250409', 'step': 10853, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:38.304205', 'step': 10853, 'epoch': 2} {'type': 'loss', 'content': 0.08925195038318634, 'timestamp': '2025-10-01 04:31:38.306434', 'step': 10854, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:38.361433', 'step': 10854, 'epoch': 2} {'type': 'loss', 'content': 0.064623162150383, 'timestamp': '2025-10-01 04:31:38.363716', 'step': 10855, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:38.423130', 'step': 10855, 'epoch': 2} {'type': 'loss', 'content': 0.11083042621612549, 'timestamp': '2025-10-01 04:31:38.429101', 'step': 10856, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:38.490022', 'step': 10856, 'epoch': 2} {'type': 'loss', 'content': 0.14680440723896027, 'timestamp': '2025-10-01 04:31:38.492389', 'step': 10857, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:38.546134', 'step': 10857, 'epoch': 2} {'type': 'loss', 'content': 0.16908015310764313, 'timestamp': '2025-10-01 04:31:38.548552', 'step': 10858, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:38.602524', 'step': 10858, 'epoch': 2} {'type': 'loss', 'content': 0.17388208210468292, 'timestamp': '2025-10-01 04:31:38.604840', 'step': 10859, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:38.658667', 'step': 10859, 'epoch': 2} {'type': 'loss', 'content': 0.07388164848089218, 'timestamp': '2025-10-01 04:31:38.665071', 'step': 10860, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:38.720404', 'step': 10860, 'epoch': 2} {'type': 'loss', 'content': 0.1148306280374527, 'timestamp': '2025-10-01 04:31:38.722677', 'step': 10861, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:38.775852', 'step': 10861, 'epoch': 2} {'type': 'loss', 'content': 0.10366907715797424, 'timestamp': '2025-10-01 04:31:38.778050', 'step': 10862, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:31:38.831389', 'step': 10862, 'epoch': 2} {'type': 'loss', 'content': 0.23898175358772278, 'timestamp': '2025-10-01 04:31:38.833661', 'step': 10863, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:38.898589', 'step': 10863, 'epoch': 2} {'type': 'loss', 'content': 0.14223544299602509, 'timestamp': '2025-10-01 04:31:38.905039', 'step': 10864, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:38.958021', 'step': 10864, 'epoch': 2} {'type': 'loss', 'content': 0.10738477855920792, 'timestamp': '2025-10-01 04:31:38.960236', 'step': 10865, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:39.014310', 'step': 10865, 'epoch': 2} {'type': 'loss', 'content': 0.08835994452238083, 'timestamp': '2025-10-01 04:31:39.016675', 'step': 10866, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:31:39.070654', 'step': 10866, 'epoch': 2} {'type': 'loss', 'content': 0.15191394090652466, 'timestamp': '2025-10-01 04:31:39.072737', 'step': 10867, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:39.126933', 'step': 10867, 'epoch': 2} {'type': 'loss', 'content': 0.1647716760635376, 'timestamp': '2025-10-01 04:31:39.133622', 'step': 10868, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:39.187813', 'step': 10868, 'epoch': 2} {'type': 'loss', 'content': 0.0889991894364357, 'timestamp': '2025-10-01 04:31:39.190517', 'step': 10869, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:39.245764', 'step': 10869, 'epoch': 2} {'type': 'loss', 'content': 0.08333060890436172, 'timestamp': '2025-10-01 04:31:39.248210', 'step': 10870, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:39.303482', 'step': 10870, 'epoch': 2} {'type': 'loss', 'content': 0.049890149384737015, 'timestamp': '2025-10-01 04:31:39.306160', 'step': 10871, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:39.360533', 'step': 10871, 'epoch': 2} {'type': 'loss', 'content': 0.1949126124382019, 'timestamp': '2025-10-01 04:31:39.366836', 'step': 10872, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:39.421584', 'step': 10872, 'epoch': 2} {'type': 'loss', 'content': 0.14937826991081238, 'timestamp': '2025-10-01 04:31:39.424001', 'step': 10873, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:39.477571', 'step': 10873, 'epoch': 2} {'type': 'loss', 'content': 0.07686058431863785, 'timestamp': '2025-10-01 04:31:39.479646', 'step': 10874, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:39.533026', 'step': 10874, 'epoch': 2} {'type': 'loss', 'content': 0.13036459684371948, 'timestamp': '2025-10-01 04:31:39.535284', 'step': 10875, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:39.590452', 'step': 10875, 'epoch': 2} {'type': 'loss', 'content': 0.13612869381904602, 'timestamp': '2025-10-01 04:31:39.596475', 'step': 10876, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:39.650935', 'step': 10876, 'epoch': 2} {'type': 'loss', 'content': 0.06590322405099869, 'timestamp': '2025-10-01 04:31:39.655249', 'step': 10877, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:39.709269', 'step': 10877, 'epoch': 2} {'type': 'loss', 'content': 0.10034707188606262, 'timestamp': '2025-10-01 04:31:39.715049', 'step': 10878, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:39.768849', 'step': 10878, 'epoch': 2} {'type': 'loss', 'content': 0.1298554688692093, 'timestamp': '2025-10-01 04:31:39.771207', 'step': 10879, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:39.825811', 'step': 10879, 'epoch': 2} {'type': 'loss', 'content': 0.0710630714893341, 'timestamp': '2025-10-01 04:31:39.831927', 'step': 10880, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:39.885185', 'step': 10880, 'epoch': 2} {'type': 'loss', 'content': 0.15336973965168, 'timestamp': '2025-10-01 04:31:39.887602', 'step': 10881, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:39.942186', 'step': 10881, 'epoch': 2} {'type': 'loss', 'content': 0.06489768624305725, 'timestamp': '2025-10-01 04:31:39.944593', 'step': 10882, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:31:39.999722', 'step': 10882, 'epoch': 2} {'type': 'loss', 'content': 0.1405639499425888, 'timestamp': '2025-10-01 04:31:40.002123', 'step': 10883, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:40.068752', 'step': 10883, 'epoch': 2} {'type': 'loss', 'content': 0.09544876962900162, 'timestamp': '2025-10-01 04:31:40.075108', 'step': 10884, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:40.129586', 'step': 10884, 'epoch': 2} {'type': 'loss', 'content': 0.06672012805938721, 'timestamp': '2025-10-01 04:31:40.132079', 'step': 10885, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:40.187038', 'step': 10885, 'epoch': 2} {'type': 'loss', 'content': 0.07745479792356491, 'timestamp': '2025-10-01 04:31:40.189611', 'step': 10886, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:40.245684', 'step': 10886, 'epoch': 2} {'type': 'loss', 'content': 0.23571942746639252, 'timestamp': '2025-10-01 04:31:40.248000', 'step': 10887, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:40.303861', 'step': 10887, 'epoch': 2} {'type': 'loss', 'content': 0.24938273429870605, 'timestamp': '2025-10-01 04:31:40.309729', 'step': 10888, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:40.362732', 'step': 10888, 'epoch': 2} {'type': 'loss', 'content': 0.12561556696891785, 'timestamp': '2025-10-01 04:31:40.364953', 'step': 10889, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:31:40.418169', 'step': 10889, 'epoch': 2} {'type': 'loss', 'content': 0.19721774756908417, 'timestamp': '2025-10-01 04:31:40.420494', 'step': 10890, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:40.474007', 'step': 10890, 'epoch': 2} {'type': 'loss', 'content': 0.12269227206707001, 'timestamp': '2025-10-01 04:31:40.476099', 'step': 10891, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:40.529497', 'step': 10891, 'epoch': 2} {'type': 'loss', 'content': 0.11476128548383713, 'timestamp': '2025-10-01 04:31:40.535523', 'step': 10892, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:40.589186', 'step': 10892, 'epoch': 2} {'type': 'loss', 'content': 0.06731217354536057, 'timestamp': '2025-10-01 04:31:40.591290', 'step': 10893, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:40.644671', 'step': 10893, 'epoch': 2} {'type': 'loss', 'content': 0.15298734605312347, 'timestamp': '2025-10-01 04:31:40.646869', 'step': 10894, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:40.699911', 'step': 10894, 'epoch': 2} {'type': 'loss', 'content': 0.09019389003515244, 'timestamp': '2025-10-01 04:31:40.702105', 'step': 10895, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:40.755414', 'step': 10895, 'epoch': 2} {'type': 'loss', 'content': 0.23260833323001862, 'timestamp': '2025-10-01 04:31:40.761327', 'step': 10896, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:31:40.814005', 'step': 10896, 'epoch': 2} {'type': 'loss', 'content': 0.09219382703304291, 'timestamp': '2025-10-01 04:31:40.816258', 'step': 10897, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:40.872581', 'step': 10897, 'epoch': 2} {'type': 'loss', 'content': 0.2099582999944687, 'timestamp': '2025-10-01 04:31:40.874883', 'step': 10898, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:31:40.928439', 'step': 10898, 'epoch': 2} {'type': 'loss', 'content': 0.06985940039157867, 'timestamp': '2025-10-01 04:31:40.930516', 'step': 10899, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:40.985267', 'step': 10899, 'epoch': 2} {'type': 'loss', 'content': 0.10290602594614029, 'timestamp': '2025-10-01 04:31:40.991352', 'step': 10900, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:31:41.046890', 'step': 10900, 'epoch': 2} {'type': 'loss', 'content': 0.13417881727218628, 'timestamp': '2025-10-01 04:31:41.049661', 'step': 10901, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:41.104669', 'step': 10901, 'epoch': 2} {'type': 'loss', 'content': 0.16140185296535492, 'timestamp': '2025-10-01 04:31:41.109439', 'step': 10902, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:41.166435', 'step': 10902, 'epoch': 2} {'type': 'loss', 'content': 0.12963730096817017, 'timestamp': '2025-10-01 04:31:41.168826', 'step': 10903, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:31:41.226706', 'step': 10903, 'epoch': 2} {'type': 'loss', 'content': 0.0861378088593483, 'timestamp': '2025-10-01 04:31:41.233079', 'step': 10904, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:41.286895', 'step': 10904, 'epoch': 2} {'type': 'loss', 'content': 0.1393449306488037, 'timestamp': '2025-10-01 04:31:41.289170', 'step': 10905, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:41.343964', 'step': 10905, 'epoch': 2} {'type': 'loss', 'content': 0.09797324985265732, 'timestamp': '2025-10-01 04:31:41.346541', 'step': 10906, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:41.402442', 'step': 10906, 'epoch': 2} {'type': 'loss', 'content': 0.09478309750556946, 'timestamp': '2025-10-01 04:31:41.405086', 'step': 10907, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:41.460397', 'step': 10907, 'epoch': 2} {'type': 'loss', 'content': 0.17292965948581696, 'timestamp': '2025-10-01 04:31:41.466771', 'step': 10908, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:31:41.523167', 'step': 10908, 'epoch': 2} {'type': 'loss', 'content': 0.12563300132751465, 'timestamp': '2025-10-01 04:31:41.525540', 'step': 10909, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:31:41.583403', 'step': 10909, 'epoch': 2} {'type': 'loss', 'content': 0.09686053544282913, 'timestamp': '2025-10-01 04:31:41.586000', 'step': 10910, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:41.641133', 'step': 10910, 'epoch': 2} {'type': 'loss', 'content': 0.2139747440814972, 'timestamp': '2025-10-01 04:31:41.643690', 'step': 10911, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:31:41.702484', 'step': 10911, 'epoch': 2} {'type': 'loss', 'content': 0.15468032658100128, 'timestamp': '2025-10-01 04:31:41.708578', 'step': 10912, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:41.762638', 'step': 10912, 'epoch': 2} {'type': 'loss', 'content': 0.1084107756614685, 'timestamp': '2025-10-01 04:31:41.765279', 'step': 10913, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:31:41.820471', 'step': 10913, 'epoch': 2} {'type': 'loss', 'content': 0.16614824533462524, 'timestamp': '2025-10-01 04:31:41.822985', 'step': 10914, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:41.878481', 'step': 10914, 'epoch': 2} {'type': 'loss', 'content': 0.10232872515916824, 'timestamp': '2025-10-01 04:31:41.881067', 'step': 10915, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:41.935855', 'step': 10915, 'epoch': 2} {'type': 'loss', 'content': 0.14518098533153534, 'timestamp': '2025-10-01 04:31:41.942098', 'step': 10916, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:41.996607', 'step': 10916, 'epoch': 2} {'type': 'loss', 'content': 0.04412691667675972, 'timestamp': '2025-10-01 04:31:41.998702', 'step': 10917, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:42.053332', 'step': 10917, 'epoch': 2} {'type': 'loss', 'content': 0.07801594585180283, 'timestamp': '2025-10-01 04:31:42.055964', 'step': 10918, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:42.110627', 'step': 10918, 'epoch': 2} {'type': 'loss', 'content': 0.16221776604652405, 'timestamp': '2025-10-01 04:31:42.113274', 'step': 10919, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:42.167913', 'step': 10919, 'epoch': 2} {'type': 'loss', 'content': 0.0981011837720871, 'timestamp': '2025-10-01 04:31:42.174288', 'step': 10920, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:42.229007', 'step': 10920, 'epoch': 2} {'type': 'loss', 'content': 0.08851341158151627, 'timestamp': '2025-10-01 04:31:42.231754', 'step': 10921, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:42.286956', 'step': 10921, 'epoch': 2} {'type': 'loss', 'content': 0.0666303038597107, 'timestamp': '2025-10-01 04:31:42.289366', 'step': 10922, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:42.344843', 'step': 10922, 'epoch': 2} {'type': 'loss', 'content': 0.1482773870229721, 'timestamp': '2025-10-01 04:31:42.347124', 'step': 10923, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:42.401175', 'step': 10923, 'epoch': 2} {'type': 'loss', 'content': 0.10584195703268051, 'timestamp': '2025-10-01 04:31:42.407112', 'step': 10924, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:42.461034', 'step': 10924, 'epoch': 2} {'type': 'loss', 'content': 0.14562810957431793, 'timestamp': '2025-10-01 04:31:42.463288', 'step': 10925, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:42.517253', 'step': 10925, 'epoch': 2} {'type': 'loss', 'content': 0.09792902320623398, 'timestamp': '2025-10-01 04:31:42.519493', 'step': 10926, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:42.577203', 'step': 10926, 'epoch': 2} {'type': 'loss', 'content': 0.15201884508132935, 'timestamp': '2025-10-01 04:31:42.579508', 'step': 10927, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:42.633894', 'step': 10927, 'epoch': 2} {'type': 'loss', 'content': 0.07171027362346649, 'timestamp': '2025-10-01 04:31:42.640089', 'step': 10928, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:42.692730', 'step': 10928, 'epoch': 2} {'type': 'loss', 'content': 0.12280914932489395, 'timestamp': '2025-10-01 04:31:42.695184', 'step': 10929, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:42.749201', 'step': 10929, 'epoch': 2} {'type': 'loss', 'content': 0.13902851939201355, 'timestamp': '2025-10-01 04:31:42.751358', 'step': 10930, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:31:42.805151', 'step': 10930, 'epoch': 2} {'type': 'loss', 'content': 0.19494189321994781, 'timestamp': '2025-10-01 04:31:42.807042', 'step': 10931, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:42.862691', 'step': 10931, 'epoch': 2} {'type': 'loss', 'content': 0.10284130275249481, 'timestamp': '2025-10-01 04:31:42.868660', 'step': 10932, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:42.921630', 'step': 10932, 'epoch': 2} {'type': 'loss', 'content': 0.09962703287601471, 'timestamp': '2025-10-01 04:31:42.924671', 'step': 10933, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:42.978021', 'step': 10933, 'epoch': 2} {'type': 'loss', 'content': 0.15987534821033478, 'timestamp': '2025-10-01 04:31:42.979975', 'step': 10934, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:31:43.033302', 'step': 10934, 'epoch': 2} {'type': 'loss', 'content': 0.1310121864080429, 'timestamp': '2025-10-01 04:31:43.035500', 'step': 10935, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:43.089190', 'step': 10935, 'epoch': 2} {'type': 'loss', 'content': 0.22882330417633057, 'timestamp': '2025-10-01 04:31:43.095089', 'step': 10936, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:43.147806', 'step': 10936, 'epoch': 2} {'type': 'loss', 'content': 0.1041506975889206, 'timestamp': '2025-10-01 04:31:43.149973', 'step': 10937, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:43.206581', 'step': 10937, 'epoch': 2} {'type': 'loss', 'content': 0.11653871089220047, 'timestamp': '2025-10-01 04:31:43.208801', 'step': 10938, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:43.262849', 'step': 10938, 'epoch': 2} {'type': 'loss', 'content': 0.12728312611579895, 'timestamp': '2025-10-01 04:31:43.265353', 'step': 10939, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:43.319666', 'step': 10939, 'epoch': 2} {'type': 'loss', 'content': 0.14124424755573273, 'timestamp': '2025-10-01 04:31:43.325724', 'step': 10940, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:43.379180', 'step': 10940, 'epoch': 2} {'type': 'loss', 'content': 0.12360575795173645, 'timestamp': '2025-10-01 04:31:43.381602', 'step': 10941, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:43.442214', 'step': 10941, 'epoch': 2} {'type': 'loss', 'content': 0.1980578750371933, 'timestamp': '2025-10-01 04:31:43.444293', 'step': 10942, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:31:43.498819', 'step': 10942, 'epoch': 2} {'type': 'loss', 'content': 0.16000811755657196, 'timestamp': '2025-10-01 04:31:43.501150', 'step': 10943, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:43.560127', 'step': 10943, 'epoch': 2} {'type': 'loss', 'content': 0.1387791931629181, 'timestamp': '2025-10-01 04:31:43.566254', 'step': 10944, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:43.621879', 'step': 10944, 'epoch': 2} {'type': 'loss', 'content': 0.1214638203382492, 'timestamp': '2025-10-01 04:31:43.623996', 'step': 10945, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:43.676919', 'step': 10945, 'epoch': 2} {'type': 'loss', 'content': 0.08334554731845856, 'timestamp': '2025-10-01 04:31:43.679086', 'step': 10946, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:43.732680', 'step': 10946, 'epoch': 2} {'type': 'loss', 'content': 0.13024908304214478, 'timestamp': '2025-10-01 04:31:43.734678', 'step': 10947, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:31:43.787819', 'step': 10947, 'epoch': 2} {'type': 'loss', 'content': 0.10990943014621735, 'timestamp': '2025-10-01 04:31:43.793739', 'step': 10948, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:43.846693', 'step': 10948, 'epoch': 2} {'type': 'loss', 'content': 0.13150615990161896, 'timestamp': '2025-10-01 04:31:43.849202', 'step': 10949, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:31:43.907627', 'step': 10949, 'epoch': 2} {'type': 'loss', 'content': 0.10857579112052917, 'timestamp': '2025-10-01 04:31:43.910910', 'step': 10950, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:43.986682', 'step': 10950, 'epoch': 2} {'type': 'loss', 'content': 0.12603500485420227, 'timestamp': '2025-10-01 04:31:43.988698', 'step': 10951, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:44.042345', 'step': 10951, 'epoch': 2} {'type': 'loss', 'content': 0.09661825001239777, 'timestamp': '2025-10-01 04:31:44.048395', 'step': 10952, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:44.101298', 'step': 10952, 'epoch': 2} {'type': 'loss', 'content': 0.06668306887149811, 'timestamp': '2025-10-01 04:31:44.103414', 'step': 10953, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:44.157467', 'step': 10953, 'epoch': 2} {'type': 'loss', 'content': 0.14167813956737518, 'timestamp': '2025-10-01 04:31:44.160085', 'step': 10954, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:44.213625', 'step': 10954, 'epoch': 2} {'type': 'loss', 'content': 0.12286011874675751, 'timestamp': '2025-10-01 04:31:44.215720', 'step': 10955, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:44.269086', 'step': 10955, 'epoch': 2} {'type': 'loss', 'content': 0.3343101739883423, 'timestamp': '2025-10-01 04:31:44.274963', 'step': 10956, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-01 04:31:44.328112', 'step': 10956, 'epoch': 2} {'type': 'loss', 'content': 0.0655617043375969, 'timestamp': '2025-10-01 04:31:44.330396', 'step': 10957, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:44.384129', 'step': 10957, 'epoch': 2} {'type': 'loss', 'content': 0.15343117713928223, 'timestamp': '2025-10-01 04:31:44.386314', 'step': 10958, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:44.440435', 'step': 10958, 'epoch': 2} {'type': 'loss', 'content': 0.12419522553682327, 'timestamp': '2025-10-01 04:31:44.442771', 'step': 10959, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:44.496640', 'step': 10959, 'epoch': 2} {'type': 'loss', 'content': 0.12084338068962097, 'timestamp': '2025-10-01 04:31:44.502945', 'step': 10960, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:44.561545', 'step': 10960, 'epoch': 2} {'type': 'loss', 'content': 0.16797006130218506, 'timestamp': '2025-10-01 04:31:44.563863', 'step': 10961, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:44.617256', 'step': 10961, 'epoch': 2} {'type': 'loss', 'content': 0.10454557836055756, 'timestamp': '2025-10-01 04:31:44.619364', 'step': 10962, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:44.672693', 'step': 10962, 'epoch': 2} {'type': 'loss', 'content': 0.09831337630748749, 'timestamp': '2025-10-01 04:31:44.674804', 'step': 10963, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:44.727820', 'step': 10963, 'epoch': 2} {'type': 'loss', 'content': 0.1364412009716034, 'timestamp': '2025-10-01 04:31:44.733750', 'step': 10964, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:44.787214', 'step': 10964, 'epoch': 2} {'type': 'loss', 'content': 0.11480611562728882, 'timestamp': '2025-10-01 04:31:44.789339', 'step': 10965, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:31:44.842661', 'step': 10965, 'epoch': 2} {'type': 'loss', 'content': 0.09614838659763336, 'timestamp': '2025-10-01 04:31:44.845022', 'step': 10966, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:44.898950', 'step': 10966, 'epoch': 2} {'type': 'loss', 'content': 0.1057753786444664, 'timestamp': '2025-10-01 04:31:44.900947', 'step': 10967, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:44.954936', 'step': 10967, 'epoch': 2} {'type': 'loss', 'content': 0.10432063788175583, 'timestamp': '2025-10-01 04:31:44.960806', 'step': 10968, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:45.015283', 'step': 10968, 'epoch': 2} {'type': 'loss', 'content': 0.10863085091114044, 'timestamp': '2025-10-01 04:31:45.017591', 'step': 10969, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:45.071567', 'step': 10969, 'epoch': 2} {'type': 'loss', 'content': 0.1612054705619812, 'timestamp': '2025-10-01 04:31:45.073786', 'step': 10970, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:45.127625', 'step': 10970, 'epoch': 2} {'type': 'loss', 'content': 0.14468680322170258, 'timestamp': '2025-10-01 04:31:45.130454', 'step': 10971, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:45.184366', 'step': 10971, 'epoch': 2} {'type': 'loss', 'content': 0.14678025245666504, 'timestamp': '2025-10-01 04:31:45.190643', 'step': 10972, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:45.243793', 'step': 10972, 'epoch': 2} {'type': 'loss', 'content': 0.07032155245542526, 'timestamp': '2025-10-01 04:31:45.246014', 'step': 10973, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:45.299853', 'step': 10973, 'epoch': 2} {'type': 'loss', 'content': 0.12420035898685455, 'timestamp': '2025-10-01 04:31:45.302013', 'step': 10974, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:45.356163', 'step': 10974, 'epoch': 2} {'type': 'loss', 'content': 0.23991729319095612, 'timestamp': '2025-10-01 04:31:45.358619', 'step': 10975, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:45.413232', 'step': 10975, 'epoch': 2} {'type': 'loss', 'content': 0.11844678223133087, 'timestamp': '2025-10-01 04:31:45.419095', 'step': 10976, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:45.474084', 'step': 10976, 'epoch': 2} {'type': 'loss', 'content': 0.12177886068820953, 'timestamp': '2025-10-01 04:31:45.476044', 'step': 10977, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:31:45.529659', 'step': 10977, 'epoch': 2} {'type': 'loss', 'content': 0.1312953680753708, 'timestamp': '2025-10-01 04:31:45.531962', 'step': 10978, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:31:45.589053', 'step': 10978, 'epoch': 2} {'type': 'loss', 'content': 0.12530022859573364, 'timestamp': '2025-10-01 04:31:45.606527', 'step': 10979, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:45.660712', 'step': 10979, 'epoch': 2} {'type': 'loss', 'content': 0.19562169909477234, 'timestamp': '2025-10-01 04:31:45.666616', 'step': 10980, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:45.723156', 'step': 10980, 'epoch': 2} {'type': 'loss', 'content': 0.10350879281759262, 'timestamp': '2025-10-01 04:31:45.725585', 'step': 10981, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:45.780700', 'step': 10981, 'epoch': 2} {'type': 'loss', 'content': 0.12149745225906372, 'timestamp': '2025-10-01 04:31:45.782954', 'step': 10982, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:31:45.844881', 'step': 10982, 'epoch': 2} {'type': 'loss', 'content': 0.174082413315773, 'timestamp': '2025-10-01 04:31:45.847173', 'step': 10983, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:45.901260', 'step': 10983, 'epoch': 2} {'type': 'loss', 'content': 0.0970577821135521, 'timestamp': '2025-10-01 04:31:45.907406', 'step': 10984, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-01 04:31:45.960506', 'step': 10984, 'epoch': 2} {'type': 'loss', 'content': 0.08479952812194824, 'timestamp': '2025-10-01 04:31:45.964229', 'step': 10985, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:46.018206', 'step': 10985, 'epoch': 2} {'type': 'loss', 'content': 0.09143596887588501, 'timestamp': '2025-10-01 04:31:46.021183', 'step': 10986, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:46.074800', 'step': 10986, 'epoch': 2} {'type': 'loss', 'content': 0.228708878159523, 'timestamp': '2025-10-01 04:31:46.077413', 'step': 10987, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:31:46.132124', 'step': 10987, 'epoch': 2} {'type': 'loss', 'content': 0.11907104402780533, 'timestamp': '2025-10-01 04:31:46.138232', 'step': 10988, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:46.191954', 'step': 10988, 'epoch': 2} {'type': 'loss', 'content': 0.11749101430177689, 'timestamp': '2025-10-01 04:31:46.194077', 'step': 10989, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:46.247050', 'step': 10989, 'epoch': 2} {'type': 'loss', 'content': 0.12989793717861176, 'timestamp': '2025-10-01 04:31:46.249852', 'step': 10990, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:31:46.304128', 'step': 10990, 'epoch': 2} {'type': 'loss', 'content': 0.15613862872123718, 'timestamp': '2025-10-01 04:31:46.306333', 'step': 10991, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:46.359862', 'step': 10991, 'epoch': 2} {'type': 'loss', 'content': 0.12697459757328033, 'timestamp': '2025-10-01 04:31:46.365789', 'step': 10992, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:46.419108', 'step': 10992, 'epoch': 2} {'type': 'loss', 'content': 0.17410670220851898, 'timestamp': '2025-10-01 04:31:46.421343', 'step': 10993, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:46.474848', 'step': 10993, 'epoch': 2} {'type': 'loss', 'content': 0.15005770325660706, 'timestamp': '2025-10-01 04:31:46.477060', 'step': 10994, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:46.531023', 'step': 10994, 'epoch': 2} {'type': 'loss', 'content': 0.09343484789133072, 'timestamp': '2025-10-01 04:31:46.533434', 'step': 10995, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:46.586982', 'step': 10995, 'epoch': 2} {'type': 'loss', 'content': 0.13190478086471558, 'timestamp': '2025-10-01 04:31:46.593076', 'step': 10996, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:46.646347', 'step': 10996, 'epoch': 2} {'type': 'loss', 'content': 0.13767269253730774, 'timestamp': '2025-10-01 04:31:46.648603', 'step': 10997, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:46.702547', 'step': 10997, 'epoch': 2} {'type': 'loss', 'content': 0.03663180395960808, 'timestamp': '2025-10-01 04:31:46.704789', 'step': 10998, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:46.758986', 'step': 10998, 'epoch': 2} {'type': 'loss', 'content': 0.22500531375408173, 'timestamp': '2025-10-01 04:31:46.761197', 'step': 10999, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:46.815055', 'step': 10999, 'epoch': 2} {'type': 'loss', 'content': 0.22234675288200378, 'timestamp': '2025-10-01 04:31:46.821232', 'step': 11000, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 11000', 'timestamp': '2025-10-01 04:31:47.182158', 'step': 11000, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:47.240720', 'step': 11000, 'epoch': 2} {'type': 'loss', 'content': 0.18643446266651154, 'timestamp': '2025-10-01 04:31:47.243045', 'step': 11001, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:47.298373', 'step': 11001, 'epoch': 2} {'type': 'loss', 'content': 0.17581981420516968, 'timestamp': '2025-10-01 04:31:47.300639', 'step': 11002, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:31:47.354660', 'step': 11002, 'epoch': 2} {'type': 'loss', 'content': 0.12981803715229034, 'timestamp': '2025-10-01 04:31:47.356999', 'step': 11003, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:47.410731', 'step': 11003, 'epoch': 2} {'type': 'loss', 'content': 0.09912867844104767, 'timestamp': '2025-10-01 04:31:47.417019', 'step': 11004, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:47.470006', 'step': 11004, 'epoch': 2} {'type': 'loss', 'content': 0.14484462141990662, 'timestamp': '2025-10-01 04:31:47.472020', 'step': 11005, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:47.525717', 'step': 11005, 'epoch': 2} {'type': 'loss', 'content': 0.20982006192207336, 'timestamp': '2025-10-01 04:31:47.528074', 'step': 11006, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:31:47.582574', 'step': 11006, 'epoch': 2} {'type': 'loss', 'content': 0.14275281131267548, 'timestamp': '2025-10-01 04:31:47.584792', 'step': 11007, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:31:47.638728', 'step': 11007, 'epoch': 2} {'type': 'loss', 'content': 0.05445970967411995, 'timestamp': '2025-10-01 04:31:47.644870', 'step': 11008, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:47.697808', 'step': 11008, 'epoch': 2} {'type': 'loss', 'content': 0.058626893907785416, 'timestamp': '2025-10-01 04:31:47.699915', 'step': 11009, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:47.755150', 'step': 11009, 'epoch': 2} {'type': 'loss', 'content': 0.15842413902282715, 'timestamp': '2025-10-01 04:31:47.757342', 'step': 11010, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:47.811700', 'step': 11010, 'epoch': 2} {'type': 'loss', 'content': 0.10636917501688004, 'timestamp': '2025-10-01 04:31:47.813819', 'step': 11011, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:47.870305', 'step': 11011, 'epoch': 2} {'type': 'loss', 'content': 0.09582395106554031, 'timestamp': '2025-10-01 04:31:47.876300', 'step': 11012, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:47.929633', 'step': 11012, 'epoch': 2} {'type': 'loss', 'content': 0.08527875691652298, 'timestamp': '2025-10-01 04:31:47.931845', 'step': 11013, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:31:47.993933', 'step': 11013, 'epoch': 2} {'type': 'loss', 'content': 0.0812932699918747, 'timestamp': '2025-10-01 04:31:47.996170', 'step': 11014, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:48.051478', 'step': 11014, 'epoch': 2} {'type': 'loss', 'content': 0.1032324880361557, 'timestamp': '2025-10-01 04:31:48.063751', 'step': 11015, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:48.133125', 'step': 11015, 'epoch': 2} {'type': 'loss', 'content': 0.1249362975358963, 'timestamp': '2025-10-01 04:31:48.148404', 'step': 11016, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:48.202433', 'step': 11016, 'epoch': 2} {'type': 'loss', 'content': 0.11789640039205551, 'timestamp': '2025-10-01 04:31:48.204601', 'step': 11017, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:48.258265', 'step': 11017, 'epoch': 2} {'type': 'loss', 'content': 0.14274930953979492, 'timestamp': '2025-10-01 04:31:48.260601', 'step': 11018, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:48.314907', 'step': 11018, 'epoch': 2} {'type': 'loss', 'content': 0.21141254901885986, 'timestamp': '2025-10-01 04:31:48.317122', 'step': 11019, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:48.370854', 'step': 11019, 'epoch': 2} {'type': 'loss', 'content': 0.03597171977162361, 'timestamp': '2025-10-01 04:31:48.377150', 'step': 11020, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:48.430380', 'step': 11020, 'epoch': 2} {'type': 'loss', 'content': 0.20811718702316284, 'timestamp': '2025-10-01 04:31:48.432426', 'step': 11021, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:31:48.485605', 'step': 11021, 'epoch': 2} {'type': 'loss', 'content': 0.16235332190990448, 'timestamp': '2025-10-01 04:31:48.489759', 'step': 11022, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:31:48.543798', 'step': 11022, 'epoch': 2} {'type': 'loss', 'content': 0.12417162954807281, 'timestamp': '2025-10-01 04:31:48.546257', 'step': 11023, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:48.600978', 'step': 11023, 'epoch': 2} {'type': 'loss', 'content': 0.10804395377635956, 'timestamp': '2025-10-01 04:31:48.607655', 'step': 11024, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:48.662310', 'step': 11024, 'epoch': 2} {'type': 'loss', 'content': 0.09483597427606583, 'timestamp': '2025-10-01 04:31:48.664443', 'step': 11025, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:31:48.719392', 'step': 11025, 'epoch': 2} {'type': 'loss', 'content': 0.15464964509010315, 'timestamp': '2025-10-01 04:31:48.722305', 'step': 11026, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:48.777449', 'step': 11026, 'epoch': 2} {'type': 'loss', 'content': 0.03803706169128418, 'timestamp': '2025-10-01 04:31:48.780122', 'step': 11027, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:48.835023', 'step': 11027, 'epoch': 2} {'type': 'loss', 'content': 0.07423688471317291, 'timestamp': '2025-10-01 04:31:48.841645', 'step': 11028, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:31:48.896313', 'step': 11028, 'epoch': 2} {'type': 'loss', 'content': 0.18016695976257324, 'timestamp': '2025-10-01 04:31:48.898923', 'step': 11029, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:48.953961', 'step': 11029, 'epoch': 2} {'type': 'loss', 'content': 0.15461376309394836, 'timestamp': '2025-10-01 04:31:48.956555', 'step': 11030, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:31:49.011835', 'step': 11030, 'epoch': 2} {'type': 'loss', 'content': 0.1380453258752823, 'timestamp': '2025-10-01 04:31:49.014756', 'step': 11031, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:49.069351', 'step': 11031, 'epoch': 2} {'type': 'loss', 'content': 0.12699437141418457, 'timestamp': '2025-10-01 04:31:49.075502', 'step': 11032, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:31:49.133592', 'step': 11032, 'epoch': 2} {'type': 'loss', 'content': 0.13592709600925446, 'timestamp': '2025-10-01 04:31:49.148603', 'step': 11033, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:49.221838', 'step': 11033, 'epoch': 2} {'type': 'loss', 'content': 0.22659513354301453, 'timestamp': '2025-10-01 04:31:49.233625', 'step': 11034, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:31:49.310367', 'step': 11034, 'epoch': 2} {'type': 'loss', 'content': 0.13892486691474915, 'timestamp': '2025-10-01 04:31:49.326834', 'step': 11035, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:49.409268', 'step': 11035, 'epoch': 2} {'type': 'loss', 'content': 0.10903976857662201, 'timestamp': '2025-10-01 04:31:49.417521', 'step': 11036, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:49.514843', 'step': 11036, 'epoch': 2} {'type': 'loss', 'content': 0.16393113136291504, 'timestamp': '2025-10-01 04:31:49.519672', 'step': 11037, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:49.593628', 'step': 11037, 'epoch': 2} {'type': 'loss', 'content': 0.14614388346672058, 'timestamp': '2025-10-01 04:31:49.612808', 'step': 11038, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:49.693244', 'step': 11038, 'epoch': 2} {'type': 'loss', 'content': 0.08296012133359909, 'timestamp': '2025-10-01 04:31:49.699993', 'step': 11039, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:49.782114', 'step': 11039, 'epoch': 2} {'type': 'loss', 'content': 0.07806359976530075, 'timestamp': '2025-10-01 04:31:49.796464', 'step': 11040, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:49.878274', 'step': 11040, 'epoch': 2} {'type': 'loss', 'content': 0.11979767680168152, 'timestamp': '2025-10-01 04:31:49.890705', 'step': 11041, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:49.988444', 'step': 11041, 'epoch': 2} {'type': 'loss', 'content': 0.10821785032749176, 'timestamp': '2025-10-01 04:31:49.994190', 'step': 11042, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:50.069019', 'step': 11042, 'epoch': 2} {'type': 'loss', 'content': 0.07335562258958817, 'timestamp': '2025-10-01 04:31:50.081816', 'step': 11043, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:31:50.147098', 'step': 11043, 'epoch': 2} {'type': 'loss', 'content': 0.11868976056575775, 'timestamp': '2025-10-01 04:31:50.163830', 'step': 11044, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:50.237197', 'step': 11044, 'epoch': 2} {'type': 'loss', 'content': 0.18962857127189636, 'timestamp': '2025-10-01 04:31:50.242843', 'step': 11045, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:50.296980', 'step': 11045, 'epoch': 2} {'type': 'loss', 'content': 0.24324959516525269, 'timestamp': '2025-10-01 04:31:50.299218', 'step': 11046, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:50.353141', 'step': 11046, 'epoch': 2} {'type': 'loss', 'content': 0.08199743926525116, 'timestamp': '2025-10-01 04:31:50.356651', 'step': 11047, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:50.429402', 'step': 11047, 'epoch': 2} {'type': 'loss', 'content': 0.09718334674835205, 'timestamp': '2025-10-01 04:31:50.435365', 'step': 11048, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:50.489663', 'step': 11048, 'epoch': 2} {'type': 'loss', 'content': 0.14353694021701813, 'timestamp': '2025-10-01 04:31:50.491918', 'step': 11049, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:50.548272', 'step': 11049, 'epoch': 2} {'type': 'loss', 'content': 0.1609295755624771, 'timestamp': '2025-10-01 04:31:50.550479', 'step': 11050, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:50.605126', 'step': 11050, 'epoch': 2} {'type': 'loss', 'content': 0.07064189016819, 'timestamp': '2025-10-01 04:31:50.607367', 'step': 11051, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:50.661034', 'step': 11051, 'epoch': 2} {'type': 'loss', 'content': 0.0823591947555542, 'timestamp': '2025-10-01 04:31:50.666914', 'step': 11052, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:31:50.720363', 'step': 11052, 'epoch': 2} {'type': 'loss', 'content': 0.08889176696538925, 'timestamp': '2025-10-01 04:31:50.722543', 'step': 11053, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:50.776015', 'step': 11053, 'epoch': 2} {'type': 'loss', 'content': 0.15174177289009094, 'timestamp': '2025-10-01 04:31:50.778241', 'step': 11054, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:50.832024', 'step': 11054, 'epoch': 2} {'type': 'loss', 'content': 0.11082859337329865, 'timestamp': '2025-10-01 04:31:50.833974', 'step': 11055, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:50.886712', 'step': 11055, 'epoch': 2} {'type': 'loss', 'content': 0.05429238826036453, 'timestamp': '2025-10-01 04:31:50.892486', 'step': 11056, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:31:50.945940', 'step': 11056, 'epoch': 2} {'type': 'loss', 'content': 0.17270927131175995, 'timestamp': '2025-10-01 04:31:50.948048', 'step': 11057, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:31:51.001750', 'step': 11057, 'epoch': 2} {'type': 'loss', 'content': 0.0690118670463562, 'timestamp': '2025-10-01 04:31:51.003976', 'step': 11058, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:51.057376', 'step': 11058, 'epoch': 2} {'type': 'loss', 'content': 0.11399944126605988, 'timestamp': '2025-10-01 04:31:51.059586', 'step': 11059, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:51.113122', 'step': 11059, 'epoch': 2} {'type': 'loss', 'content': 0.17419536411762238, 'timestamp': '2025-10-01 04:31:51.118845', 'step': 11060, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:51.171997', 'step': 11060, 'epoch': 2} {'type': 'loss', 'content': 0.14837424457073212, 'timestamp': '2025-10-01 04:31:51.174120', 'step': 11061, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:51.233744', 'step': 11061, 'epoch': 2} {'type': 'loss', 'content': 0.06290648877620697, 'timestamp': '2025-10-01 04:31:51.236083', 'step': 11062, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:51.289443', 'step': 11062, 'epoch': 2} {'type': 'loss', 'content': 0.06514745950698853, 'timestamp': '2025-10-01 04:31:51.291667', 'step': 11063, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:51.346467', 'step': 11063, 'epoch': 2} {'type': 'loss', 'content': 0.1393335461616516, 'timestamp': '2025-10-01 04:31:51.354280', 'step': 11064, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:51.407889', 'step': 11064, 'epoch': 2} {'type': 'loss', 'content': 0.06626898050308228, 'timestamp': '2025-10-01 04:31:51.410016', 'step': 11065, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:31:51.463700', 'step': 11065, 'epoch': 2} {'type': 'loss', 'content': 0.07900454103946686, 'timestamp': '2025-10-01 04:31:51.465915', 'step': 11066, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:51.519762', 'step': 11066, 'epoch': 2} {'type': 'loss', 'content': 0.2510843873023987, 'timestamp': '2025-10-01 04:31:51.522014', 'step': 11067, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:51.575481', 'step': 11067, 'epoch': 2} {'type': 'loss', 'content': 0.20707519352436066, 'timestamp': '2025-10-01 04:31:51.582759', 'step': 11068, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:51.637354', 'step': 11068, 'epoch': 2} {'type': 'loss', 'content': 0.13940449059009552, 'timestamp': '2025-10-01 04:31:51.639604', 'step': 11069, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:51.694179', 'step': 11069, 'epoch': 2} {'type': 'loss', 'content': 0.11311814188957214, 'timestamp': '2025-10-01 04:31:51.696477', 'step': 11070, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:51.753080', 'step': 11070, 'epoch': 2} {'type': 'loss', 'content': 0.2271977812051773, 'timestamp': '2025-10-01 04:31:51.755625', 'step': 11071, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:51.809652', 'step': 11071, 'epoch': 2} {'type': 'loss', 'content': 0.17735856771469116, 'timestamp': '2025-10-01 04:31:51.816088', 'step': 11072, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:31:51.869305', 'step': 11072, 'epoch': 2} {'type': 'loss', 'content': 0.12397951632738113, 'timestamp': '2025-10-01 04:31:51.871491', 'step': 11073, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:51.924885', 'step': 11073, 'epoch': 2} {'type': 'loss', 'content': 0.20897334814071655, 'timestamp': '2025-10-01 04:31:51.927114', 'step': 11074, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:31:51.980762', 'step': 11074, 'epoch': 2} {'type': 'loss', 'content': 0.07161252200603485, 'timestamp': '2025-10-01 04:31:51.983006', 'step': 11075, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:52.036766', 'step': 11075, 'epoch': 2} {'type': 'loss', 'content': 0.17172124981880188, 'timestamp': '2025-10-01 04:31:52.042799', 'step': 11076, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:31:52.111084', 'step': 11076, 'epoch': 2} {'type': 'loss', 'content': 0.0827641636133194, 'timestamp': '2025-10-01 04:31:52.113273', 'step': 11077, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:52.166137', 'step': 11077, 'epoch': 2} {'type': 'loss', 'content': 0.13134846091270447, 'timestamp': '2025-10-01 04:31:52.168278', 'step': 11078, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:52.222192', 'step': 11078, 'epoch': 2} {'type': 'loss', 'content': 0.13196133077144623, 'timestamp': '2025-10-01 04:31:52.224546', 'step': 11079, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:52.289860', 'step': 11079, 'epoch': 2} {'type': 'loss', 'content': 0.17156369984149933, 'timestamp': '2025-10-01 04:31:52.295670', 'step': 11080, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:52.348880', 'step': 11080, 'epoch': 2} {'type': 'loss', 'content': 0.14470401406288147, 'timestamp': '2025-10-01 04:31:52.351065', 'step': 11081, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:31:52.405318', 'step': 11081, 'epoch': 2} {'type': 'loss', 'content': 0.08807095885276794, 'timestamp': '2025-10-01 04:31:52.407785', 'step': 11082, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:52.460716', 'step': 11082, 'epoch': 2} {'type': 'loss', 'content': 0.11139501631259918, 'timestamp': '2025-10-01 04:31:52.462950', 'step': 11083, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:52.516507', 'step': 11083, 'epoch': 2} {'type': 'loss', 'content': 0.1892416924238205, 'timestamp': '2025-10-01 04:31:52.523264', 'step': 11084, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:52.576899', 'step': 11084, 'epoch': 2} {'type': 'loss', 'content': 0.12264388054609299, 'timestamp': '2025-10-01 04:31:52.579004', 'step': 11085, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:52.633067', 'step': 11085, 'epoch': 2} {'type': 'loss', 'content': 0.05423026531934738, 'timestamp': '2025-10-01 04:31:52.635543', 'step': 11086, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:52.690263', 'step': 11086, 'epoch': 2} {'type': 'loss', 'content': 0.07178225368261337, 'timestamp': '2025-10-01 04:31:52.692811', 'step': 11087, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:52.746479', 'step': 11087, 'epoch': 2} {'type': 'loss', 'content': 0.0717315673828125, 'timestamp': '2025-10-01 04:31:52.752489', 'step': 11088, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:52.812075', 'step': 11088, 'epoch': 2} {'type': 'loss', 'content': 0.18289530277252197, 'timestamp': '2025-10-01 04:31:52.814253', 'step': 11089, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:52.882201', 'step': 11089, 'epoch': 2} {'type': 'loss', 'content': 0.14518868923187256, 'timestamp': '2025-10-01 04:31:52.884411', 'step': 11090, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:52.937674', 'step': 11090, 'epoch': 2} {'type': 'loss', 'content': 0.14342935383319855, 'timestamp': '2025-10-01 04:31:52.939805', 'step': 11091, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:52.993406', 'step': 11091, 'epoch': 2} {'type': 'loss', 'content': 0.14704158902168274, 'timestamp': '2025-10-01 04:31:52.999223', 'step': 11092, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:53.052145', 'step': 11092, 'epoch': 2} {'type': 'loss', 'content': 0.21198375523090363, 'timestamp': '2025-10-01 04:31:53.054167', 'step': 11093, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:53.109197', 'step': 11093, 'epoch': 2} {'type': 'loss', 'content': 0.12524451315402985, 'timestamp': '2025-10-01 04:31:53.111486', 'step': 11094, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:31:53.165054', 'step': 11094, 'epoch': 2} {'type': 'loss', 'content': 0.09301194548606873, 'timestamp': '2025-10-01 04:31:53.167329', 'step': 11095, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:53.220497', 'step': 11095, 'epoch': 2} {'type': 'loss', 'content': 0.15058626234531403, 'timestamp': '2025-10-01 04:31:53.226214', 'step': 11096, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:31:53.279313', 'step': 11096, 'epoch': 2} {'type': 'loss', 'content': 0.1542167365550995, 'timestamp': '2025-10-01 04:31:53.286290', 'step': 11097, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:53.344138', 'step': 11097, 'epoch': 2} {'type': 'loss', 'content': 0.12027892470359802, 'timestamp': '2025-10-01 04:31:53.347787', 'step': 11098, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:53.403524', 'step': 11098, 'epoch': 2} {'type': 'loss', 'content': 0.12204103916883469, 'timestamp': '2025-10-01 04:31:53.405815', 'step': 11099, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:53.459140', 'step': 11099, 'epoch': 2} {'type': 'loss', 'content': 0.1590469479560852, 'timestamp': '2025-10-01 04:31:53.465157', 'step': 11100, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:31:53.518463', 'step': 11100, 'epoch': 2} {'type': 'loss', 'content': 0.1316957026720047, 'timestamp': '2025-10-01 04:31:53.520756', 'step': 11101, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:53.575362', 'step': 11101, 'epoch': 2} {'type': 'loss', 'content': 0.12053310871124268, 'timestamp': '2025-10-01 04:31:53.577777', 'step': 11102, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:53.632422', 'step': 11102, 'epoch': 2} {'type': 'loss', 'content': 0.11277659982442856, 'timestamp': '2025-10-01 04:31:53.634612', 'step': 11103, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:53.688189', 'step': 11103, 'epoch': 2} {'type': 'loss', 'content': 0.11077148467302322, 'timestamp': '2025-10-01 04:31:53.693857', 'step': 11104, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:31:53.748747', 'step': 11104, 'epoch': 2} {'type': 'loss', 'content': 0.08629500865936279, 'timestamp': '2025-10-01 04:31:53.751488', 'step': 11105, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:31:53.817460', 'step': 11105, 'epoch': 2} {'type': 'loss', 'content': 0.10662630200386047, 'timestamp': '2025-10-01 04:31:53.819978', 'step': 11106, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:53.874290', 'step': 11106, 'epoch': 2} {'type': 'loss', 'content': 0.0850505605340004, 'timestamp': '2025-10-01 04:31:53.876438', 'step': 11107, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:53.929930', 'step': 11107, 'epoch': 2} {'type': 'loss', 'content': 0.2001728117465973, 'timestamp': '2025-10-01 04:31:53.936195', 'step': 11108, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:53.990947', 'step': 11108, 'epoch': 2} {'type': 'loss', 'content': 0.12717139720916748, 'timestamp': '2025-10-01 04:31:53.993002', 'step': 11109, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:31:54.061746', 'step': 11109, 'epoch': 2} {'type': 'loss', 'content': 0.28341829776763916, 'timestamp': '2025-10-01 04:31:54.063910', 'step': 11110, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:54.118836', 'step': 11110, 'epoch': 2} {'type': 'loss', 'content': 0.13372184336185455, 'timestamp': '2025-10-01 04:31:54.121079', 'step': 11111, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-01 04:31:54.182173', 'step': 11111, 'epoch': 2} {'type': 'loss', 'content': 0.10151908546686172, 'timestamp': '2025-10-01 04:31:54.193330', 'step': 11112, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:54.246998', 'step': 11112, 'epoch': 2} {'type': 'loss', 'content': 0.07372604310512543, 'timestamp': '2025-10-01 04:31:54.251016', 'step': 11113, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:54.309608', 'step': 11113, 'epoch': 2} {'type': 'loss', 'content': 0.13428440690040588, 'timestamp': '2025-10-01 04:31:54.314538', 'step': 11114, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:54.376829', 'step': 11114, 'epoch': 2} {'type': 'loss', 'content': 0.0714678093791008, 'timestamp': '2025-10-01 04:31:54.379213', 'step': 11115, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:54.436757', 'step': 11115, 'epoch': 2} {'type': 'loss', 'content': 0.13901981711387634, 'timestamp': '2025-10-01 04:31:54.442837', 'step': 11116, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:54.499226', 'step': 11116, 'epoch': 2} {'type': 'loss', 'content': 0.16719955205917358, 'timestamp': '2025-10-01 04:31:54.501907', 'step': 11117, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:54.558203', 'step': 11117, 'epoch': 2} {'type': 'loss', 'content': 0.07630742341279984, 'timestamp': '2025-10-01 04:31:54.560306', 'step': 11118, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:54.613957', 'step': 11118, 'epoch': 2} {'type': 'loss', 'content': 0.06882598996162415, 'timestamp': '2025-10-01 04:31:54.616367', 'step': 11119, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:31:54.681917', 'step': 11119, 'epoch': 2} {'type': 'loss', 'content': 0.25281381607055664, 'timestamp': '2025-10-01 04:31:54.687782', 'step': 11120, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:54.740785', 'step': 11120, 'epoch': 2} {'type': 'loss', 'content': 0.15813125669956207, 'timestamp': '2025-10-01 04:31:54.742926', 'step': 11121, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:31:54.795834', 'step': 11121, 'epoch': 2} {'type': 'loss', 'content': 0.16064560413360596, 'timestamp': '2025-10-01 04:31:54.798371', 'step': 11122, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:54.852752', 'step': 11122, 'epoch': 2} {'type': 'loss', 'content': 0.2405264675617218, 'timestamp': '2025-10-01 04:31:54.855181', 'step': 11123, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:54.908858', 'step': 11123, 'epoch': 2} {'type': 'loss', 'content': 0.12351708859205246, 'timestamp': '2025-10-01 04:31:54.914769', 'step': 11124, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:31:54.968236', 'step': 11124, 'epoch': 2} {'type': 'loss', 'content': 0.10749839246273041, 'timestamp': '2025-10-01 04:31:54.970555', 'step': 11125, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:55.038304', 'step': 11125, 'epoch': 2} {'type': 'loss', 'content': 0.16338221728801727, 'timestamp': '2025-10-01 04:31:55.041002', 'step': 11126, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:55.094612', 'step': 11126, 'epoch': 2} {'type': 'loss', 'content': 0.14479109644889832, 'timestamp': '2025-10-01 04:31:55.096914', 'step': 11127, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:55.153774', 'step': 11127, 'epoch': 2} {'type': 'loss', 'content': 0.10233595222234726, 'timestamp': '2025-10-01 04:31:55.160285', 'step': 11128, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:31:55.213781', 'step': 11128, 'epoch': 2} {'type': 'loss', 'content': 0.07754366099834442, 'timestamp': '2025-10-01 04:31:55.216343', 'step': 11129, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:55.282424', 'step': 11129, 'epoch': 2} {'type': 'loss', 'content': 0.1296227127313614, 'timestamp': '2025-10-01 04:31:55.284908', 'step': 11130, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:55.354849', 'step': 11130, 'epoch': 2} {'type': 'loss', 'content': 0.11480005085468292, 'timestamp': '2025-10-01 04:31:55.356985', 'step': 11131, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:31:55.415117', 'step': 11131, 'epoch': 2} {'type': 'loss', 'content': 0.15837444365024567, 'timestamp': '2025-10-01 04:31:55.421093', 'step': 11132, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:55.474114', 'step': 11132, 'epoch': 2} {'type': 'loss', 'content': 0.08881748467683792, 'timestamp': '2025-10-01 04:31:55.477434', 'step': 11133, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:55.531478', 'step': 11133, 'epoch': 2} {'type': 'loss', 'content': 0.13524822890758514, 'timestamp': '2025-10-01 04:31:55.534185', 'step': 11134, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:55.587788', 'step': 11134, 'epoch': 2} {'type': 'loss', 'content': 0.23850315809249878, 'timestamp': '2025-10-01 04:31:55.589978', 'step': 11135, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:55.643529', 'step': 11135, 'epoch': 2} {'type': 'loss', 'content': 0.0506187304854393, 'timestamp': '2025-10-01 04:31:55.649953', 'step': 11136, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:55.703430', 'step': 11136, 'epoch': 2} {'type': 'loss', 'content': 0.1265859603881836, 'timestamp': '2025-10-01 04:31:55.705667', 'step': 11137, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:55.759686', 'step': 11137, 'epoch': 2} {'type': 'loss', 'content': 0.10297562927007675, 'timestamp': '2025-10-01 04:31:55.761802', 'step': 11138, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:55.816502', 'step': 11138, 'epoch': 2} {'type': 'loss', 'content': 0.12763549387454987, 'timestamp': '2025-10-01 04:31:55.818796', 'step': 11139, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:55.872981', 'step': 11139, 'epoch': 2} {'type': 'loss', 'content': 0.17808160185813904, 'timestamp': '2025-10-01 04:31:55.879418', 'step': 11140, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-01 04:31:55.934752', 'step': 11140, 'epoch': 2} {'type': 'loss', 'content': 0.1119375005364418, 'timestamp': '2025-10-01 04:31:55.937095', 'step': 11141, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:55.990835', 'step': 11141, 'epoch': 2} {'type': 'loss', 'content': 0.0848168283700943, 'timestamp': '2025-10-01 04:31:55.992931', 'step': 11142, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:56.047214', 'step': 11142, 'epoch': 2} {'type': 'loss', 'content': 0.14355160295963287, 'timestamp': '2025-10-01 04:31:56.049408', 'step': 11143, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:56.103596', 'step': 11143, 'epoch': 2} {'type': 'loss', 'content': 0.11497373878955841, 'timestamp': '2025-10-01 04:31:56.110011', 'step': 11144, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:56.163784', 'step': 11144, 'epoch': 2} {'type': 'loss', 'content': 0.11660069227218628, 'timestamp': '2025-10-01 04:31:56.165790', 'step': 11145, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:56.221324', 'step': 11145, 'epoch': 2} {'type': 'loss', 'content': 0.2938612699508667, 'timestamp': '2025-10-01 04:31:56.223672', 'step': 11146, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:56.278029', 'step': 11146, 'epoch': 2} {'type': 'loss', 'content': 0.14943860471248627, 'timestamp': '2025-10-01 04:31:56.280168', 'step': 11147, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:56.335542', 'step': 11147, 'epoch': 2} {'type': 'loss', 'content': 0.11261875182390213, 'timestamp': '2025-10-01 04:31:56.341907', 'step': 11148, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:31:56.396005', 'step': 11148, 'epoch': 2} {'type': 'loss', 'content': 0.15426139533519745, 'timestamp': '2025-10-01 04:31:56.398090', 'step': 11149, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:56.468008', 'step': 11149, 'epoch': 2} {'type': 'loss', 'content': 0.1618409901857376, 'timestamp': '2025-10-01 04:31:56.470271', 'step': 11150, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:56.524611', 'step': 11150, 'epoch': 2} {'type': 'loss', 'content': 0.14153140783309937, 'timestamp': '2025-10-01 04:31:56.526873', 'step': 11151, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:56.580800', 'step': 11151, 'epoch': 2} {'type': 'loss', 'content': 0.127765491604805, 'timestamp': '2025-10-01 04:31:56.586900', 'step': 11152, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:56.644581', 'step': 11152, 'epoch': 2} {'type': 'loss', 'content': 0.09968066215515137, 'timestamp': '2025-10-01 04:31:56.646935', 'step': 11153, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:56.700835', 'step': 11153, 'epoch': 2} {'type': 'loss', 'content': 0.1939961165189743, 'timestamp': '2025-10-01 04:31:56.703072', 'step': 11154, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:56.757319', 'step': 11154, 'epoch': 2} {'type': 'loss', 'content': 0.2159961313009262, 'timestamp': '2025-10-01 04:31:56.759671', 'step': 11155, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:56.814545', 'step': 11155, 'epoch': 2} {'type': 'loss', 'content': 0.08908353000879288, 'timestamp': '2025-10-01 04:31:56.820352', 'step': 11156, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:31:56.873554', 'step': 11156, 'epoch': 2} {'type': 'loss', 'content': 0.04466703534126282, 'timestamp': '2025-10-01 04:31:56.876096', 'step': 11157, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:56.931031', 'step': 11157, 'epoch': 2} {'type': 'loss', 'content': 0.13904987275600433, 'timestamp': '2025-10-01 04:31:56.933260', 'step': 11158, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:56.989904', 'step': 11158, 'epoch': 2} {'type': 'loss', 'content': 0.08092255890369415, 'timestamp': '2025-10-01 04:31:56.991884', 'step': 11159, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:31:57.046964', 'step': 11159, 'epoch': 2} {'type': 'loss', 'content': 0.053015489131212234, 'timestamp': '2025-10-01 04:31:57.053573', 'step': 11160, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:57.109773', 'step': 11160, 'epoch': 2} {'type': 'loss', 'content': 0.18145859241485596, 'timestamp': '2025-10-01 04:31:57.111992', 'step': 11161, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:57.168493', 'step': 11161, 'epoch': 2} {'type': 'loss', 'content': 0.13132140040397644, 'timestamp': '2025-10-01 04:31:57.170776', 'step': 11162, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:31:57.227175', 'step': 11162, 'epoch': 2} {'type': 'loss', 'content': 0.1347379833459854, 'timestamp': '2025-10-01 04:31:57.229817', 'step': 11163, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:57.287204', 'step': 11163, 'epoch': 2} {'type': 'loss', 'content': 0.2103584110736847, 'timestamp': '2025-10-01 04:31:57.293859', 'step': 11164, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:57.349907', 'step': 11164, 'epoch': 2} {'type': 'loss', 'content': 0.11859028041362762, 'timestamp': '2025-10-01 04:31:57.352503', 'step': 11165, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:57.409102', 'step': 11165, 'epoch': 2} {'type': 'loss', 'content': 0.09577254205942154, 'timestamp': '2025-10-01 04:31:57.411740', 'step': 11166, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:57.469539', 'step': 11166, 'epoch': 2} {'type': 'loss', 'content': 0.21331186592578888, 'timestamp': '2025-10-01 04:31:57.472203', 'step': 11167, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:57.529487', 'step': 11167, 'epoch': 2} {'type': 'loss', 'content': 0.15411753952503204, 'timestamp': '2025-10-01 04:31:57.536415', 'step': 11168, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:57.591723', 'step': 11168, 'epoch': 2} {'type': 'loss', 'content': 0.12992757558822632, 'timestamp': '2025-10-01 04:31:57.594545', 'step': 11169, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:31:57.652235', 'step': 11169, 'epoch': 2} {'type': 'loss', 'content': 0.0904587060213089, 'timestamp': '2025-10-01 04:31:57.654457', 'step': 11170, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:57.718147', 'step': 11170, 'epoch': 2} {'type': 'loss', 'content': 0.1534430980682373, 'timestamp': '2025-10-01 04:31:57.720771', 'step': 11171, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:57.790815', 'step': 11171, 'epoch': 2} {'type': 'loss', 'content': 0.08514516055583954, 'timestamp': '2025-10-01 04:31:57.799074', 'step': 11172, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:57.858738', 'step': 11172, 'epoch': 2} {'type': 'loss', 'content': 0.19818617403507233, 'timestamp': '2025-10-01 04:31:57.861345', 'step': 11173, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:57.923138', 'step': 11173, 'epoch': 2} {'type': 'loss', 'content': 0.16567444801330566, 'timestamp': '2025-10-01 04:31:57.925489', 'step': 11174, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:31:57.980019', 'step': 11174, 'epoch': 2} {'type': 'loss', 'content': 0.20385465025901794, 'timestamp': '2025-10-01 04:31:57.983019', 'step': 11175, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:58.040901', 'step': 11175, 'epoch': 2} {'type': 'loss', 'content': 0.21245478093624115, 'timestamp': '2025-10-01 04:31:58.047439', 'step': 11176, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:58.108546', 'step': 11176, 'epoch': 2} {'type': 'loss', 'content': 0.08019396662712097, 'timestamp': '2025-10-01 04:31:58.110992', 'step': 11177, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:58.165295', 'step': 11177, 'epoch': 2} {'type': 'loss', 'content': 0.11345580965280533, 'timestamp': '2025-10-01 04:31:58.167998', 'step': 11178, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:31:58.235264', 'step': 11178, 'epoch': 2} {'type': 'loss', 'content': 0.1291092187166214, 'timestamp': '2025-10-01 04:31:58.237833', 'step': 11179, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:31:58.292331', 'step': 11179, 'epoch': 2} {'type': 'loss', 'content': 0.11172531545162201, 'timestamp': '2025-10-01 04:31:58.298879', 'step': 11180, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:31:58.352588', 'step': 11180, 'epoch': 2} {'type': 'loss', 'content': 0.1498723179101944, 'timestamp': '2025-10-01 04:31:58.357216', 'step': 11181, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:58.417540', 'step': 11181, 'epoch': 2} {'type': 'loss', 'content': 0.047869909554719925, 'timestamp': '2025-10-01 04:31:58.422677', 'step': 11182, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:58.480241', 'step': 11182, 'epoch': 2} {'type': 'loss', 'content': 0.15487779676914215, 'timestamp': '2025-10-01 04:31:58.482997', 'step': 11183, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:31:58.539731', 'step': 11183, 'epoch': 2} {'type': 'loss', 'content': 0.1712261587381363, 'timestamp': '2025-10-01 04:31:58.545576', 'step': 11184, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-01 04:32:11.620273', 'step': 11184, 'epoch': 2} {'type': 'pplx', 'content': 13575.875136528042, 'timestamp': '2025-10-01 04:32:11.623263', 'step': 11184, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:11.676566', 'step': 11184, 'epoch': 2} {'type': 'loss', 'content': 0.11303070187568665, 'timestamp': '2025-10-01 04:32:11.678781', 'step': 11185, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:11.735689', 'step': 11185, 'epoch': 2} {'type': 'loss', 'content': 0.12674091756343842, 'timestamp': '2025-10-01 04:32:11.737902', 'step': 11186, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:11.791780', 'step': 11186, 'epoch': 2} {'type': 'loss', 'content': 0.15299823880195618, 'timestamp': '2025-10-01 04:32:11.794224', 'step': 11187, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:11.848223', 'step': 11187, 'epoch': 2} {'type': 'loss', 'content': 0.10792466253042221, 'timestamp': '2025-10-01 04:32:11.854474', 'step': 11188, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:32:11.909469', 'step': 11188, 'epoch': 2} {'type': 'loss', 'content': 0.07630980759859085, 'timestamp': '2025-10-01 04:32:11.911142', 'step': 11189, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:11.965060', 'step': 11189, 'epoch': 2} {'type': 'loss', 'content': 0.15941564738750458, 'timestamp': '2025-10-01 04:32:11.967214', 'step': 11190, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:12.022130', 'step': 11190, 'epoch': 2} {'type': 'loss', 'content': 0.12090342491865158, 'timestamp': '2025-10-01 04:32:12.024503', 'step': 11191, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:12.079072', 'step': 11191, 'epoch': 2} {'type': 'loss', 'content': 0.2288440614938736, 'timestamp': '2025-10-01 04:32:12.085324', 'step': 11192, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:12.139139', 'step': 11192, 'epoch': 2} {'type': 'loss', 'content': 0.16875766217708588, 'timestamp': '2025-10-01 04:32:12.151891', 'step': 11193, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:12.215014', 'step': 11193, 'epoch': 2} {'type': 'loss', 'content': 0.1398450881242752, 'timestamp': '2025-10-01 04:32:12.217396', 'step': 11194, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:32:12.271449', 'step': 11194, 'epoch': 2} {'type': 'loss', 'content': 0.14606507122516632, 'timestamp': '2025-10-01 04:32:12.273843', 'step': 11195, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:12.328928', 'step': 11195, 'epoch': 2} {'type': 'loss', 'content': 0.16680066287517548, 'timestamp': '2025-10-01 04:32:12.335095', 'step': 11196, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:12.388334', 'step': 11196, 'epoch': 2} {'type': 'loss', 'content': 0.12171844393014908, 'timestamp': '2025-10-01 04:32:12.390881', 'step': 11197, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:12.445217', 'step': 11197, 'epoch': 2} {'type': 'loss', 'content': 0.04707057774066925, 'timestamp': '2025-10-01 04:32:12.447423', 'step': 11198, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:32:12.501418', 'step': 11198, 'epoch': 2} {'type': 'loss', 'content': 0.13252608478069305, 'timestamp': '2025-10-01 04:32:12.504114', 'step': 11199, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:12.558724', 'step': 11199, 'epoch': 2} {'type': 'loss', 'content': 0.13961243629455566, 'timestamp': '2025-10-01 04:32:12.565040', 'step': 11200, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:12.618599', 'step': 11200, 'epoch': 2} {'type': 'loss', 'content': 0.1489555686712265, 'timestamp': '2025-10-01 04:32:12.620937', 'step': 11201, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:12.679105', 'step': 11201, 'epoch': 2} {'type': 'loss', 'content': 0.06383123248815536, 'timestamp': '2025-10-01 04:32:12.681391', 'step': 11202, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:12.735128', 'step': 11202, 'epoch': 2} {'type': 'loss', 'content': 0.14913420379161835, 'timestamp': '2025-10-01 04:32:12.737373', 'step': 11203, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:12.792292', 'step': 11203, 'epoch': 2} {'type': 'loss', 'content': 0.16458411514759064, 'timestamp': '2025-10-01 04:32:12.799030', 'step': 11204, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:12.851842', 'step': 11204, 'epoch': 2} {'type': 'loss', 'content': 0.12613560259342194, 'timestamp': '2025-10-01 04:32:12.854292', 'step': 11205, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:12.919688', 'step': 11205, 'epoch': 2} {'type': 'loss', 'content': 0.10463462769985199, 'timestamp': '2025-10-01 04:32:12.921736', 'step': 11206, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:12.975296', 'step': 11206, 'epoch': 2} {'type': 'loss', 'content': 0.11817643791437149, 'timestamp': '2025-10-01 04:32:12.977539', 'step': 11207, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:13.031327', 'step': 11207, 'epoch': 2} {'type': 'loss', 'content': 0.1334100216627121, 'timestamp': '2025-10-01 04:32:13.037369', 'step': 11208, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:13.089896', 'step': 11208, 'epoch': 2} {'type': 'loss', 'content': 0.1657099425792694, 'timestamp': '2025-10-01 04:32:13.097348', 'step': 11209, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:13.174169', 'step': 11209, 'epoch': 2} {'type': 'loss', 'content': 0.13333535194396973, 'timestamp': '2025-10-01 04:32:13.176470', 'step': 11210, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:13.242592', 'step': 11210, 'epoch': 2} {'type': 'loss', 'content': 0.14686580002307892, 'timestamp': '2025-10-01 04:32:13.248176', 'step': 11211, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:13.304866', 'step': 11211, 'epoch': 2} {'type': 'loss', 'content': 0.07835011929273605, 'timestamp': '2025-10-01 04:32:13.321814', 'step': 11212, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:13.381285', 'step': 11212, 'epoch': 2} {'type': 'loss', 'content': 0.15238972008228302, 'timestamp': '2025-10-01 04:32:13.388442', 'step': 11213, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:13.453540', 'step': 11213, 'epoch': 2} {'type': 'loss', 'content': 0.09774913638830185, 'timestamp': '2025-10-01 04:32:13.457286', 'step': 11214, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:32:13.511037', 'step': 11214, 'epoch': 2} {'type': 'loss', 'content': 0.11696717888116837, 'timestamp': '2025-10-01 04:32:13.515561', 'step': 11215, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:13.572803', 'step': 11215, 'epoch': 2} {'type': 'loss', 'content': 0.13220742344856262, 'timestamp': '2025-10-01 04:32:13.578554', 'step': 11216, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:13.689382', 'step': 11216, 'epoch': 2} {'type': 'loss', 'content': 0.11597193777561188, 'timestamp': '2025-10-01 04:32:13.692004', 'step': 11217, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-01 04:32:13.755301', 'step': 11217, 'epoch': 2} {'type': 'loss', 'content': 0.23177039623260498, 'timestamp': '2025-10-01 04:32:13.757789', 'step': 11218, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:13.821745', 'step': 11218, 'epoch': 2} {'type': 'loss', 'content': 0.06748715788125992, 'timestamp': '2025-10-01 04:32:13.829345', 'step': 11219, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:13.885015', 'step': 11219, 'epoch': 2} {'type': 'loss', 'content': 0.08211800456047058, 'timestamp': '2025-10-01 04:32:13.891226', 'step': 11220, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:13.957117', 'step': 11220, 'epoch': 2} {'type': 'loss', 'content': 0.07961858063936234, 'timestamp': '2025-10-01 04:32:13.962218', 'step': 11221, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:32:14.033616', 'step': 11221, 'epoch': 2} {'type': 'loss', 'content': 0.10632903128862381, 'timestamp': '2025-10-01 04:32:14.037231', 'step': 11222, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:14.096894', 'step': 11222, 'epoch': 2} {'type': 'loss', 'content': 0.09385423362255096, 'timestamp': '2025-10-01 04:32:14.110361', 'step': 11223, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:14.175544', 'step': 11223, 'epoch': 2} {'type': 'loss', 'content': 0.08265866339206696, 'timestamp': '2025-10-01 04:32:14.181730', 'step': 11224, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:32:14.242431', 'step': 11224, 'epoch': 2} {'type': 'loss', 'content': 0.04199036583304405, 'timestamp': '2025-10-01 04:32:14.251156', 'step': 11225, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:14.319422', 'step': 11225, 'epoch': 2} {'type': 'loss', 'content': 0.060934267938137054, 'timestamp': '2025-10-01 04:32:14.321556', 'step': 11226, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:14.392066', 'step': 11226, 'epoch': 2} {'type': 'loss', 'content': 0.08436906337738037, 'timestamp': '2025-10-01 04:32:14.394217', 'step': 11227, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:14.448687', 'step': 11227, 'epoch': 2} {'type': 'loss', 'content': 0.16066177189350128, 'timestamp': '2025-10-01 04:32:14.454650', 'step': 11228, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:14.507873', 'step': 11228, 'epoch': 2} {'type': 'loss', 'content': 0.12440638989210129, 'timestamp': '2025-10-01 04:32:14.510695', 'step': 11229, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:32:14.566420', 'step': 11229, 'epoch': 2} {'type': 'loss', 'content': 0.10620122402906418, 'timestamp': '2025-10-01 04:32:14.568919', 'step': 11230, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:14.631022', 'step': 11230, 'epoch': 2} {'type': 'loss', 'content': 0.07082600146532059, 'timestamp': '2025-10-01 04:32:14.640508', 'step': 11231, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:14.696720', 'step': 11231, 'epoch': 2} {'type': 'loss', 'content': 0.10555337369441986, 'timestamp': '2025-10-01 04:32:14.702791', 'step': 11232, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:14.756359', 'step': 11232, 'epoch': 2} {'type': 'loss', 'content': 0.13033418357372284, 'timestamp': '2025-10-01 04:32:14.758711', 'step': 11233, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:14.812835', 'step': 11233, 'epoch': 2} {'type': 'loss', 'content': 0.16877533495426178, 'timestamp': '2025-10-01 04:32:14.815310', 'step': 11234, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:14.871241', 'step': 11234, 'epoch': 2} {'type': 'loss', 'content': 0.030009523034095764, 'timestamp': '2025-10-01 04:32:14.873364', 'step': 11235, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:14.926971', 'step': 11235, 'epoch': 2} {'type': 'loss', 'content': 0.043130163103342056, 'timestamp': '2025-10-01 04:32:14.932810', 'step': 11236, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:14.985762', 'step': 11236, 'epoch': 2} {'type': 'loss', 'content': 0.09583190828561783, 'timestamp': '2025-10-01 04:32:14.987800', 'step': 11237, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:15.041612', 'step': 11237, 'epoch': 2} {'type': 'loss', 'content': 0.1139984279870987, 'timestamp': '2025-10-01 04:32:15.043890', 'step': 11238, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:15.097205', 'step': 11238, 'epoch': 2} {'type': 'loss', 'content': 0.1034805104136467, 'timestamp': '2025-10-01 04:32:15.101571', 'step': 11239, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:15.160578', 'step': 11239, 'epoch': 2} {'type': 'loss', 'content': 0.09693461656570435, 'timestamp': '2025-10-01 04:32:15.166206', 'step': 11240, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:15.218913', 'step': 11240, 'epoch': 2} {'type': 'loss', 'content': 0.11364968866109848, 'timestamp': '2025-10-01 04:32:15.223730', 'step': 11241, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:15.285160', 'step': 11241, 'epoch': 2} {'type': 'loss', 'content': 0.07479482144117355, 'timestamp': '2025-10-01 04:32:15.292837', 'step': 11242, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:15.352819', 'step': 11242, 'epoch': 2} {'type': 'loss', 'content': 0.12191615253686905, 'timestamp': '2025-10-01 04:32:15.354773', 'step': 11243, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:15.408409', 'step': 11243, 'epoch': 2} {'type': 'loss', 'content': 0.10787883400917053, 'timestamp': '2025-10-01 04:32:15.414060', 'step': 11244, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:15.469819', 'step': 11244, 'epoch': 2} {'type': 'loss', 'content': 0.17090241611003876, 'timestamp': '2025-10-01 04:32:15.471896', 'step': 11245, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:15.525967', 'step': 11245, 'epoch': 2} {'type': 'loss', 'content': 0.06462215632200241, 'timestamp': '2025-10-01 04:32:15.528007', 'step': 11246, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:15.581761', 'step': 11246, 'epoch': 2} {'type': 'loss', 'content': 0.15413449704647064, 'timestamp': '2025-10-01 04:32:15.583835', 'step': 11247, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:32:15.637608', 'step': 11247, 'epoch': 2} {'type': 'loss', 'content': 0.15215180814266205, 'timestamp': '2025-10-01 04:32:15.643521', 'step': 11248, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:32:15.696285', 'step': 11248, 'epoch': 2} {'type': 'loss', 'content': 0.16900524497032166, 'timestamp': '2025-10-01 04:32:15.698162', 'step': 11249, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:32:15.751575', 'step': 11249, 'epoch': 2} {'type': 'loss', 'content': 0.10668091475963593, 'timestamp': '2025-10-01 04:32:15.753478', 'step': 11250, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:32:15.806933', 'step': 11250, 'epoch': 2} {'type': 'loss', 'content': 0.14210113883018494, 'timestamp': '2025-10-01 04:32:15.809055', 'step': 11251, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:15.862075', 'step': 11251, 'epoch': 2} {'type': 'loss', 'content': 0.07729709893465042, 'timestamp': '2025-10-01 04:32:15.867814', 'step': 11252, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:15.920314', 'step': 11252, 'epoch': 2} {'type': 'loss', 'content': 0.18738432228565216, 'timestamp': '2025-10-01 04:32:15.922602', 'step': 11253, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:15.976441', 'step': 11253, 'epoch': 2} {'type': 'loss', 'content': 0.1606062650680542, 'timestamp': '2025-10-01 04:32:15.978555', 'step': 11254, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:16.032148', 'step': 11254, 'epoch': 2} {'type': 'loss', 'content': 0.14387716352939606, 'timestamp': '2025-10-01 04:32:16.034103', 'step': 11255, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:16.087564', 'step': 11255, 'epoch': 2} {'type': 'loss', 'content': 0.1322953999042511, 'timestamp': '2025-10-01 04:32:16.093076', 'step': 11256, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:16.145847', 'step': 11256, 'epoch': 2} {'type': 'loss', 'content': 0.03976206108927727, 'timestamp': '2025-10-01 04:32:16.147834', 'step': 11257, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:16.201816', 'step': 11257, 'epoch': 2} {'type': 'loss', 'content': 0.07490488141775131, 'timestamp': '2025-10-01 04:32:16.203953', 'step': 11258, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:16.260743', 'step': 11258, 'epoch': 2} {'type': 'loss', 'content': 0.20245546102523804, 'timestamp': '2025-10-01 04:32:16.262851', 'step': 11259, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:16.316645', 'step': 11259, 'epoch': 2} {'type': 'loss', 'content': 0.06349371373653412, 'timestamp': '2025-10-01 04:32:16.322686', 'step': 11260, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:16.375521', 'step': 11260, 'epoch': 2} {'type': 'loss', 'content': 0.06633343547582626, 'timestamp': '2025-10-01 04:32:16.377647', 'step': 11261, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:16.430614', 'step': 11261, 'epoch': 2} {'type': 'loss', 'content': 0.06382111459970474, 'timestamp': '2025-10-01 04:32:16.432651', 'step': 11262, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:16.485943', 'step': 11262, 'epoch': 2} {'type': 'loss', 'content': 0.07965800166130066, 'timestamp': '2025-10-01 04:32:16.487828', 'step': 11263, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:16.540981', 'step': 11263, 'epoch': 2} {'type': 'loss', 'content': 0.14591337740421295, 'timestamp': '2025-10-01 04:32:16.546894', 'step': 11264, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:16.600445', 'step': 11264, 'epoch': 2} {'type': 'loss', 'content': 0.13312074542045593, 'timestamp': '2025-10-01 04:32:16.602370', 'step': 11265, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:16.657160', 'step': 11265, 'epoch': 2} {'type': 'loss', 'content': 0.0732053816318512, 'timestamp': '2025-10-01 04:32:16.659116', 'step': 11266, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:16.712039', 'step': 11266, 'epoch': 2} {'type': 'loss', 'content': 0.17375072836875916, 'timestamp': '2025-10-01 04:32:16.714227', 'step': 11267, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:16.772609', 'step': 11267, 'epoch': 2} {'type': 'loss', 'content': 0.044729046523571014, 'timestamp': '2025-10-01 04:32:16.778468', 'step': 11268, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:16.831418', 'step': 11268, 'epoch': 2} {'type': 'loss', 'content': 0.16093534231185913, 'timestamp': '2025-10-01 04:32:16.833911', 'step': 11269, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:16.887411', 'step': 11269, 'epoch': 2} {'type': 'loss', 'content': 0.13202834129333496, 'timestamp': '2025-10-01 04:32:16.889581', 'step': 11270, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:16.942647', 'step': 11270, 'epoch': 2} {'type': 'loss', 'content': 0.09893869608640671, 'timestamp': '2025-10-01 04:32:16.944676', 'step': 11271, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:16.997725', 'step': 11271, 'epoch': 2} {'type': 'loss', 'content': 0.1710948646068573, 'timestamp': '2025-10-01 04:32:17.005278', 'step': 11272, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:17.065866', 'step': 11272, 'epoch': 2} {'type': 'loss', 'content': 0.13291434943675995, 'timestamp': '2025-10-01 04:32:17.067932', 'step': 11273, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:17.121554', 'step': 11273, 'epoch': 2} {'type': 'loss', 'content': 0.04942800849676132, 'timestamp': '2025-10-01 04:32:17.123550', 'step': 11274, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:17.176906', 'step': 11274, 'epoch': 2} {'type': 'loss', 'content': 0.12031987309455872, 'timestamp': '2025-10-01 04:32:17.179663', 'step': 11275, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:32:17.232912', 'step': 11275, 'epoch': 2} {'type': 'loss', 'content': 0.1457379013299942, 'timestamp': '2025-10-01 04:32:17.238607', 'step': 11276, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:17.291760', 'step': 11276, 'epoch': 2} {'type': 'loss', 'content': 0.10227926820516586, 'timestamp': '2025-10-01 04:32:17.293723', 'step': 11277, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:32:17.347138', 'step': 11277, 'epoch': 2} {'type': 'loss', 'content': 0.054199714213609695, 'timestamp': '2025-10-01 04:32:17.349155', 'step': 11278, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:17.402474', 'step': 11278, 'epoch': 2} {'type': 'loss', 'content': 0.15715034306049347, 'timestamp': '2025-10-01 04:32:17.404572', 'step': 11279, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:17.458158', 'step': 11279, 'epoch': 2} {'type': 'loss', 'content': 0.12328873574733734, 'timestamp': '2025-10-01 04:32:17.463815', 'step': 11280, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:17.517130', 'step': 11280, 'epoch': 2} {'type': 'loss', 'content': 0.0697346031665802, 'timestamp': '2025-10-01 04:32:17.519172', 'step': 11281, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:17.572407', 'step': 11281, 'epoch': 2} {'type': 'loss', 'content': 0.14726313948631287, 'timestamp': '2025-10-01 04:32:17.574733', 'step': 11282, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:17.629187', 'step': 11282, 'epoch': 2} {'type': 'loss', 'content': 0.0628611221909523, 'timestamp': '2025-10-01 04:32:17.631391', 'step': 11283, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:17.684819', 'step': 11283, 'epoch': 2} {'type': 'loss', 'content': 0.1710575670003891, 'timestamp': '2025-10-01 04:32:17.690717', 'step': 11284, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:17.743722', 'step': 11284, 'epoch': 2} {'type': 'loss', 'content': 0.08985838294029236, 'timestamp': '2025-10-01 04:32:17.745967', 'step': 11285, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:32:17.799554', 'step': 11285, 'epoch': 2} {'type': 'loss', 'content': 0.21006959676742554, 'timestamp': '2025-10-01 04:32:17.801667', 'step': 11286, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:17.856155', 'step': 11286, 'epoch': 2} {'type': 'loss', 'content': 0.06723940372467041, 'timestamp': '2025-10-01 04:32:17.858467', 'step': 11287, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:17.911744', 'step': 11287, 'epoch': 2} {'type': 'loss', 'content': 0.18713855743408203, 'timestamp': '2025-10-01 04:32:17.917290', 'step': 11288, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:17.970105', 'step': 11288, 'epoch': 2} {'type': 'loss', 'content': 0.10943553596735, 'timestamp': '2025-10-01 04:32:17.972374', 'step': 11289, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:18.025781', 'step': 11289, 'epoch': 2} {'type': 'loss', 'content': 0.12206383794546127, 'timestamp': '2025-10-01 04:32:18.027669', 'step': 11290, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:18.080896', 'step': 11290, 'epoch': 2} {'type': 'loss', 'content': 0.0890842005610466, 'timestamp': '2025-10-01 04:32:18.083026', 'step': 11291, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:18.137496', 'step': 11291, 'epoch': 2} {'type': 'loss', 'content': 0.15653271973133087, 'timestamp': '2025-10-01 04:32:18.143765', 'step': 11292, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:18.198322', 'step': 11292, 'epoch': 2} {'type': 'loss', 'content': 0.16271506249904633, 'timestamp': '2025-10-01 04:32:18.200520', 'step': 11293, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:18.254942', 'step': 11293, 'epoch': 2} {'type': 'loss', 'content': 0.15910595655441284, 'timestamp': '2025-10-01 04:32:18.257036', 'step': 11294, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:18.310860', 'step': 11294, 'epoch': 2} {'type': 'loss', 'content': 0.16371847689151764, 'timestamp': '2025-10-01 04:32:18.313041', 'step': 11295, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:18.371721', 'step': 11295, 'epoch': 2} {'type': 'loss', 'content': 0.13354027271270752, 'timestamp': '2025-10-01 04:32:18.377884', 'step': 11296, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:18.431043', 'step': 11296, 'epoch': 2} {'type': 'loss', 'content': 0.1456848829984665, 'timestamp': '2025-10-01 04:32:18.432971', 'step': 11297, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:18.486691', 'step': 11297, 'epoch': 2} {'type': 'loss', 'content': 0.07463765144348145, 'timestamp': '2025-10-01 04:32:18.488614', 'step': 11298, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:18.542147', 'step': 11298, 'epoch': 2} {'type': 'loss', 'content': 0.13729232549667358, 'timestamp': '2025-10-01 04:32:18.544290', 'step': 11299, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:18.597487', 'step': 11299, 'epoch': 2} {'type': 'loss', 'content': 0.10378291457891464, 'timestamp': '2025-10-01 04:32:18.603713', 'step': 11300, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:32:18.657813', 'step': 11300, 'epoch': 2} {'type': 'loss', 'content': 0.15591369569301605, 'timestamp': '2025-10-01 04:32:18.660183', 'step': 11301, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:18.714132', 'step': 11301, 'epoch': 2} {'type': 'loss', 'content': 0.1070292741060257, 'timestamp': '2025-10-01 04:32:18.716048', 'step': 11302, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:18.769096', 'step': 11302, 'epoch': 2} {'type': 'loss', 'content': 0.14179003238677979, 'timestamp': '2025-10-01 04:32:18.773272', 'step': 11303, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:18.830585', 'step': 11303, 'epoch': 2} {'type': 'loss', 'content': 0.15059810876846313, 'timestamp': '2025-10-01 04:32:18.837343', 'step': 11304, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:18.893331', 'step': 11304, 'epoch': 2} {'type': 'loss', 'content': 0.23012174665927887, 'timestamp': '2025-10-01 04:32:18.895232', 'step': 11305, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:18.948392', 'step': 11305, 'epoch': 2} {'type': 'loss', 'content': 0.13878799974918365, 'timestamp': '2025-10-01 04:32:18.950518', 'step': 11306, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:19.007779', 'step': 11306, 'epoch': 2} {'type': 'loss', 'content': 0.21531346440315247, 'timestamp': '2025-10-01 04:32:19.010068', 'step': 11307, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:19.063973', 'step': 11307, 'epoch': 2} {'type': 'loss', 'content': 0.18713419139385223, 'timestamp': '2025-10-01 04:32:19.069881', 'step': 11308, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:19.123139', 'step': 11308, 'epoch': 2} {'type': 'loss', 'content': 0.16197839379310608, 'timestamp': '2025-10-01 04:32:19.125131', 'step': 11309, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:19.185013', 'step': 11309, 'epoch': 2} {'type': 'loss', 'content': 0.11972786486148834, 'timestamp': '2025-10-01 04:32:19.187090', 'step': 11310, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:19.241149', 'step': 11310, 'epoch': 2} {'type': 'loss', 'content': 0.15900474786758423, 'timestamp': '2025-10-01 04:32:19.243347', 'step': 11311, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:19.296821', 'step': 11311, 'epoch': 2} {'type': 'loss', 'content': 0.1072315201163292, 'timestamp': '2025-10-01 04:32:19.302614', 'step': 11312, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:19.355825', 'step': 11312, 'epoch': 2} {'type': 'loss', 'content': 0.16197943687438965, 'timestamp': '2025-10-01 04:32:19.358244', 'step': 11313, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:32:19.411859', 'step': 11313, 'epoch': 2} {'type': 'loss', 'content': 0.12004267424345016, 'timestamp': '2025-10-01 04:32:19.414036', 'step': 11314, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:19.467013', 'step': 11314, 'epoch': 2} {'type': 'loss', 'content': 0.15415401756763458, 'timestamp': '2025-10-01 04:32:19.469150', 'step': 11315, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:19.522497', 'step': 11315, 'epoch': 2} {'type': 'loss', 'content': 0.17890805006027222, 'timestamp': '2025-10-01 04:32:19.528274', 'step': 11316, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:19.581298', 'step': 11316, 'epoch': 2} {'type': 'loss', 'content': 0.13789840042591095, 'timestamp': '2025-10-01 04:32:19.583878', 'step': 11317, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:19.643477', 'step': 11317, 'epoch': 2} {'type': 'loss', 'content': 0.12585438787937164, 'timestamp': '2025-10-01 04:32:19.646382', 'step': 11318, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:19.700842', 'step': 11318, 'epoch': 2} {'type': 'loss', 'content': 0.08370069414377213, 'timestamp': '2025-10-01 04:32:19.703931', 'step': 11319, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:32:19.757480', 'step': 11319, 'epoch': 2} {'type': 'loss', 'content': 0.0876217857003212, 'timestamp': '2025-10-01 04:32:19.763156', 'step': 11320, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:19.816630', 'step': 11320, 'epoch': 2} {'type': 'loss', 'content': 0.19713225960731506, 'timestamp': '2025-10-01 04:32:19.818675', 'step': 11321, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:19.873398', 'step': 11321, 'epoch': 2} {'type': 'loss', 'content': 0.11871475726366043, 'timestamp': '2025-10-01 04:32:19.875803', 'step': 11322, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:32:19.930174', 'step': 11322, 'epoch': 2} {'type': 'loss', 'content': 0.13444621860980988, 'timestamp': '2025-10-01 04:32:19.932459', 'step': 11323, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:32:19.987282', 'step': 11323, 'epoch': 2} {'type': 'loss', 'content': 0.05417969450354576, 'timestamp': '2025-10-01 04:32:19.993508', 'step': 11324, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:32:20.047831', 'step': 11324, 'epoch': 2} {'type': 'loss', 'content': 0.1728331446647644, 'timestamp': '2025-10-01 04:32:20.051255', 'step': 11325, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:20.106356', 'step': 11325, 'epoch': 2} {'type': 'loss', 'content': 0.21262317895889282, 'timestamp': '2025-10-01 04:32:20.108449', 'step': 11326, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:20.162817', 'step': 11326, 'epoch': 2} {'type': 'loss', 'content': 0.08596726506948471, 'timestamp': '2025-10-01 04:32:20.164778', 'step': 11327, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:32:20.218956', 'step': 11327, 'epoch': 2} {'type': 'loss', 'content': 0.06675350666046143, 'timestamp': '2025-10-01 04:32:20.224983', 'step': 11328, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:20.279798', 'step': 11328, 'epoch': 2} {'type': 'loss', 'content': 0.09527270495891571, 'timestamp': '2025-10-01 04:32:20.281977', 'step': 11329, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:20.336639', 'step': 11329, 'epoch': 2} {'type': 'loss', 'content': 0.13676711916923523, 'timestamp': '2025-10-01 04:32:20.341257', 'step': 11330, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:20.395408', 'step': 11330, 'epoch': 2} {'type': 'loss', 'content': 0.053971827030181885, 'timestamp': '2025-10-01 04:32:20.397552', 'step': 11331, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:32:20.451374', 'step': 11331, 'epoch': 2} {'type': 'loss', 'content': 0.23709999024868011, 'timestamp': '2025-10-01 04:32:20.457434', 'step': 11332, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:20.510422', 'step': 11332, 'epoch': 2} {'type': 'loss', 'content': 0.22148959338665009, 'timestamp': '2025-10-01 04:32:20.512326', 'step': 11333, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:20.565551', 'step': 11333, 'epoch': 2} {'type': 'loss', 'content': 0.11155744642019272, 'timestamp': '2025-10-01 04:32:20.567406', 'step': 11334, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:20.620501', 'step': 11334, 'epoch': 2} {'type': 'loss', 'content': 0.05949932336807251, 'timestamp': '2025-10-01 04:32:20.623486', 'step': 11335, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:20.677329', 'step': 11335, 'epoch': 2} {'type': 'loss', 'content': 0.09364201128482819, 'timestamp': '2025-10-01 04:32:20.684073', 'step': 11336, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:20.738180', 'step': 11336, 'epoch': 2} {'type': 'loss', 'content': 0.06807442754507065, 'timestamp': '2025-10-01 04:32:20.740379', 'step': 11337, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:20.794641', 'step': 11337, 'epoch': 2} {'type': 'loss', 'content': 0.07118765264749527, 'timestamp': '2025-10-01 04:32:20.797232', 'step': 11338, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:20.860903', 'step': 11338, 'epoch': 2} {'type': 'loss', 'content': 0.15781062841415405, 'timestamp': '2025-10-01 04:32:20.863849', 'step': 11339, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:20.918193', 'step': 11339, 'epoch': 2} {'type': 'loss', 'content': 0.06446409225463867, 'timestamp': '2025-10-01 04:32:20.924147', 'step': 11340, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:20.977546', 'step': 11340, 'epoch': 2} {'type': 'loss', 'content': 0.15533939003944397, 'timestamp': '2025-10-01 04:32:20.979947', 'step': 11341, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:21.033825', 'step': 11341, 'epoch': 2} {'type': 'loss', 'content': 0.13311447203159332, 'timestamp': '2025-10-01 04:32:21.035955', 'step': 11342, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:21.090743', 'step': 11342, 'epoch': 2} {'type': 'loss', 'content': 0.09403412789106369, 'timestamp': '2025-10-01 04:32:21.093460', 'step': 11343, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:21.148176', 'step': 11343, 'epoch': 2} {'type': 'loss', 'content': 0.15398818254470825, 'timestamp': '2025-10-01 04:32:21.154950', 'step': 11344, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:21.209859', 'step': 11344, 'epoch': 2} {'type': 'loss', 'content': 0.15374527871608734, 'timestamp': '2025-10-01 04:32:21.212028', 'step': 11345, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:21.266166', 'step': 11345, 'epoch': 2} {'type': 'loss', 'content': 0.10727841407060623, 'timestamp': '2025-10-01 04:32:21.268078', 'step': 11346, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:21.327704', 'step': 11346, 'epoch': 2} {'type': 'loss', 'content': 0.10287593305110931, 'timestamp': '2025-10-01 04:32:21.329883', 'step': 11347, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:21.383837', 'step': 11347, 'epoch': 2} {'type': 'loss', 'content': 0.1424560695886612, 'timestamp': '2025-10-01 04:32:21.389856', 'step': 11348, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:21.444760', 'step': 11348, 'epoch': 2} {'type': 'loss', 'content': 0.15875503420829773, 'timestamp': '2025-10-01 04:32:21.447063', 'step': 11349, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:21.501011', 'step': 11349, 'epoch': 2} {'type': 'loss', 'content': 0.14423403143882751, 'timestamp': '2025-10-01 04:32:21.503176', 'step': 11350, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:21.557092', 'step': 11350, 'epoch': 2} {'type': 'loss', 'content': 0.1260557919740677, 'timestamp': '2025-10-01 04:32:21.559582', 'step': 11351, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:32:21.621168', 'step': 11351, 'epoch': 2} {'type': 'loss', 'content': 0.17369869351387024, 'timestamp': '2025-10-01 04:32:21.628103', 'step': 11352, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:21.688418', 'step': 11352, 'epoch': 2} {'type': 'loss', 'content': 0.07422124594449997, 'timestamp': '2025-10-01 04:32:21.691072', 'step': 11353, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:32:21.745138', 'step': 11353, 'epoch': 2} {'type': 'loss', 'content': 0.119855135679245, 'timestamp': '2025-10-01 04:32:21.747570', 'step': 11354, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:32:21.801834', 'step': 11354, 'epoch': 2} {'type': 'loss', 'content': 0.13603951036930084, 'timestamp': '2025-10-01 04:32:21.806790', 'step': 11355, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:21.863901', 'step': 11355, 'epoch': 2} {'type': 'loss', 'content': 0.0924612432718277, 'timestamp': '2025-10-01 04:32:21.869987', 'step': 11356, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:21.922924', 'step': 11356, 'epoch': 2} {'type': 'loss', 'content': 0.11352520436048508, 'timestamp': '2025-10-01 04:32:21.925265', 'step': 11357, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:21.979025', 'step': 11357, 'epoch': 2} {'type': 'loss', 'content': 0.11558626592159271, 'timestamp': '2025-10-01 04:32:21.981285', 'step': 11358, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:22.035345', 'step': 11358, 'epoch': 2} {'type': 'loss', 'content': 0.11346537619829178, 'timestamp': '2025-10-01 04:32:22.037952', 'step': 11359, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:32:22.093091', 'step': 11359, 'epoch': 2} {'type': 'loss', 'content': 0.13034768402576447, 'timestamp': '2025-10-01 04:32:22.099591', 'step': 11360, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:22.155645', 'step': 11360, 'epoch': 2} {'type': 'loss', 'content': 0.15662968158721924, 'timestamp': '2025-10-01 04:32:22.157861', 'step': 11361, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:22.212920', 'step': 11361, 'epoch': 2} {'type': 'loss', 'content': 0.07601376622915268, 'timestamp': '2025-10-01 04:32:22.214934', 'step': 11362, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:22.269550', 'step': 11362, 'epoch': 2} {'type': 'loss', 'content': 0.10712366551160812, 'timestamp': '2025-10-01 04:32:22.272016', 'step': 11363, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:22.326635', 'step': 11363, 'epoch': 2} {'type': 'loss', 'content': 0.08011632412672043, 'timestamp': '2025-10-01 04:32:22.333148', 'step': 11364, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:22.387591', 'step': 11364, 'epoch': 2} {'type': 'loss', 'content': 0.12345389276742935, 'timestamp': '2025-10-01 04:32:22.389840', 'step': 11365, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:22.446547', 'step': 11365, 'epoch': 2} {'type': 'loss', 'content': 0.10925143957138062, 'timestamp': '2025-10-01 04:32:22.448933', 'step': 11366, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:32:22.503504', 'step': 11366, 'epoch': 2} {'type': 'loss', 'content': 0.14489279687404633, 'timestamp': '2025-10-01 04:32:22.506144', 'step': 11367, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:32:22.560726', 'step': 11367, 'epoch': 2} {'type': 'loss', 'content': 0.05842191353440285, 'timestamp': '2025-10-01 04:32:22.566949', 'step': 11368, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:22.621005', 'step': 11368, 'epoch': 2} {'type': 'loss', 'content': 0.18444624543190002, 'timestamp': '2025-10-01 04:32:22.624037', 'step': 11369, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:22.679128', 'step': 11369, 'epoch': 2} {'type': 'loss', 'content': 0.14575883746147156, 'timestamp': '2025-10-01 04:32:22.683834', 'step': 11370, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:22.741507', 'step': 11370, 'epoch': 2} {'type': 'loss', 'content': 0.11647925525903702, 'timestamp': '2025-10-01 04:32:22.743896', 'step': 11371, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:32:22.807776', 'step': 11371, 'epoch': 2} {'type': 'loss', 'content': 0.10493697971105576, 'timestamp': '2025-10-01 04:32:22.814313', 'step': 11372, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:22.869994', 'step': 11372, 'epoch': 2} {'type': 'loss', 'content': 0.08999771624803543, 'timestamp': '2025-10-01 04:32:22.872114', 'step': 11373, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:22.925681', 'step': 11373, 'epoch': 2} {'type': 'loss', 'content': 0.09163163602352142, 'timestamp': '2025-10-01 04:32:22.927881', 'step': 11374, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:22.988299', 'step': 11374, 'epoch': 2} {'type': 'loss', 'content': 0.1633225381374359, 'timestamp': '2025-10-01 04:32:22.990619', 'step': 11375, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:23.051574', 'step': 11375, 'epoch': 2} {'type': 'loss', 'content': 0.059140246361494064, 'timestamp': '2025-10-01 04:32:23.057621', 'step': 11376, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:32:23.111379', 'step': 11376, 'epoch': 2} {'type': 'loss', 'content': 0.15308980643749237, 'timestamp': '2025-10-01 04:32:23.113629', 'step': 11377, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:32:23.171616', 'step': 11377, 'epoch': 2} {'type': 'loss', 'content': 0.2171611338853836, 'timestamp': '2025-10-01 04:32:23.174472', 'step': 11378, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:23.229030', 'step': 11378, 'epoch': 2} {'type': 'loss', 'content': 0.2116997092962265, 'timestamp': '2025-10-01 04:32:23.231525', 'step': 11379, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:23.286995', 'step': 11379, 'epoch': 2} {'type': 'loss', 'content': 0.16800634562969208, 'timestamp': '2025-10-01 04:32:23.294627', 'step': 11380, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:32:23.358641', 'step': 11380, 'epoch': 2} {'type': 'loss', 'content': 0.09346191585063934, 'timestamp': '2025-10-01 04:32:23.361117', 'step': 11381, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:23.416613', 'step': 11381, 'epoch': 2} {'type': 'loss', 'content': 0.1546214371919632, 'timestamp': '2025-10-01 04:32:23.418990', 'step': 11382, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:32:23.474683', 'step': 11382, 'epoch': 2} {'type': 'loss', 'content': 0.11902259290218353, 'timestamp': '2025-10-01 04:32:23.476806', 'step': 11383, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:23.536792', 'step': 11383, 'epoch': 2} {'type': 'loss', 'content': 0.10278886556625366, 'timestamp': '2025-10-01 04:32:23.543042', 'step': 11384, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:23.596232', 'step': 11384, 'epoch': 2} {'type': 'loss', 'content': 0.06871434301137924, 'timestamp': '2025-10-01 04:32:23.598660', 'step': 11385, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:23.652802', 'step': 11385, 'epoch': 2} {'type': 'loss', 'content': 0.15931512415409088, 'timestamp': '2025-10-01 04:32:23.655491', 'step': 11386, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:23.710575', 'step': 11386, 'epoch': 2} {'type': 'loss', 'content': 0.09233653545379639, 'timestamp': '2025-10-01 04:32:23.712602', 'step': 11387, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:23.767894', 'step': 11387, 'epoch': 2} {'type': 'loss', 'content': 0.11518526077270508, 'timestamp': '2025-10-01 04:32:23.774081', 'step': 11388, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:23.827893', 'step': 11388, 'epoch': 2} {'type': 'loss', 'content': 0.09822940826416016, 'timestamp': '2025-10-01 04:32:23.830099', 'step': 11389, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:23.885655', 'step': 11389, 'epoch': 2} {'type': 'loss', 'content': 0.09194475412368774, 'timestamp': '2025-10-01 04:32:23.887869', 'step': 11390, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:23.942022', 'step': 11390, 'epoch': 2} {'type': 'loss', 'content': 0.16507183015346527, 'timestamp': '2025-10-01 04:32:23.944344', 'step': 11391, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:23.998477', 'step': 11391, 'epoch': 2} {'type': 'loss', 'content': 0.11553533375263214, 'timestamp': '2025-10-01 04:32:24.004447', 'step': 11392, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:24.058354', 'step': 11392, 'epoch': 2} {'type': 'loss', 'content': 0.13069328665733337, 'timestamp': '2025-10-01 04:32:24.060388', 'step': 11393, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:24.113786', 'step': 11393, 'epoch': 2} {'type': 'loss', 'content': 0.10617136210203171, 'timestamp': '2025-10-01 04:32:24.116314', 'step': 11394, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:24.170221', 'step': 11394, 'epoch': 2} {'type': 'loss', 'content': 0.18169917166233063, 'timestamp': '2025-10-01 04:32:24.172407', 'step': 11395, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:24.226647', 'step': 11395, 'epoch': 2} {'type': 'loss', 'content': 0.05355854704976082, 'timestamp': '2025-10-01 04:32:24.232910', 'step': 11396, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:24.286880', 'step': 11396, 'epoch': 2} {'type': 'loss', 'content': 0.14405497908592224, 'timestamp': '2025-10-01 04:32:24.288906', 'step': 11397, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:24.343364', 'step': 11397, 'epoch': 2} {'type': 'loss', 'content': 0.10575860738754272, 'timestamp': '2025-10-01 04:32:24.346500', 'step': 11398, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:24.399802', 'step': 11398, 'epoch': 2} {'type': 'loss', 'content': 0.16596123576164246, 'timestamp': '2025-10-01 04:32:24.401788', 'step': 11399, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:32:24.460827', 'step': 11399, 'epoch': 2} {'type': 'loss', 'content': 0.11830655485391617, 'timestamp': '2025-10-01 04:32:24.466808', 'step': 11400, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:24.521219', 'step': 11400, 'epoch': 2} {'type': 'loss', 'content': 0.15570060908794403, 'timestamp': '2025-10-01 04:32:24.523464', 'step': 11401, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:24.577126', 'step': 11401, 'epoch': 2} {'type': 'loss', 'content': 0.16645079851150513, 'timestamp': '2025-10-01 04:32:24.579135', 'step': 11402, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:24.632624', 'step': 11402, 'epoch': 2} {'type': 'loss', 'content': 0.13768266141414642, 'timestamp': '2025-10-01 04:32:24.635880', 'step': 11403, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:24.689704', 'step': 11403, 'epoch': 2} {'type': 'loss', 'content': 0.1496727615594864, 'timestamp': '2025-10-01 04:32:24.695499', 'step': 11404, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:24.749117', 'step': 11404, 'epoch': 2} {'type': 'loss', 'content': 0.17159925401210785, 'timestamp': '2025-10-01 04:32:24.751141', 'step': 11405, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:24.804677', 'step': 11405, 'epoch': 2} {'type': 'loss', 'content': 0.17491862177848816, 'timestamp': '2025-10-01 04:32:24.806890', 'step': 11406, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:32:24.860878', 'step': 11406, 'epoch': 2} {'type': 'loss', 'content': 0.12123400717973709, 'timestamp': '2025-10-01 04:32:24.863069', 'step': 11407, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:24.917626', 'step': 11407, 'epoch': 2} {'type': 'loss', 'content': 0.15504352748394012, 'timestamp': '2025-10-01 04:32:24.923854', 'step': 11408, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:32:24.977891', 'step': 11408, 'epoch': 2} {'type': 'loss', 'content': 0.1259644478559494, 'timestamp': '2025-10-01 04:32:24.981616', 'step': 11409, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:25.035475', 'step': 11409, 'epoch': 2} {'type': 'loss', 'content': 0.14559553563594818, 'timestamp': '2025-10-01 04:32:25.038155', 'step': 11410, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:25.092467', 'step': 11410, 'epoch': 2} {'type': 'loss', 'content': 0.2326543778181076, 'timestamp': '2025-10-01 04:32:25.095024', 'step': 11411, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:25.148714', 'step': 11411, 'epoch': 2} {'type': 'loss', 'content': 0.11419463902711868, 'timestamp': '2025-10-01 04:32:25.154765', 'step': 11412, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:25.208376', 'step': 11412, 'epoch': 2} {'type': 'loss', 'content': 0.13502787053585052, 'timestamp': '2025-10-01 04:32:25.211197', 'step': 11413, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:32:25.264826', 'step': 11413, 'epoch': 2} {'type': 'loss', 'content': 0.078654944896698, 'timestamp': '2025-10-01 04:32:25.267592', 'step': 11414, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:25.322185', 'step': 11414, 'epoch': 2} {'type': 'loss', 'content': 0.11195988208055496, 'timestamp': '2025-10-01 04:32:25.324209', 'step': 11415, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:25.378768', 'step': 11415, 'epoch': 2} {'type': 'loss', 'content': 0.07605637609958649, 'timestamp': '2025-10-01 04:32:25.384960', 'step': 11416, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:25.452893', 'step': 11416, 'epoch': 2} {'type': 'loss', 'content': 0.09995699673891068, 'timestamp': '2025-10-01 04:32:25.455169', 'step': 11417, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:25.509191', 'step': 11417, 'epoch': 2} {'type': 'loss', 'content': 0.16712653636932373, 'timestamp': '2025-10-01 04:32:25.511438', 'step': 11418, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:25.566222', 'step': 11418, 'epoch': 2} {'type': 'loss', 'content': 0.12407181411981583, 'timestamp': '2025-10-01 04:32:25.568735', 'step': 11419, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:25.623043', 'step': 11419, 'epoch': 2} {'type': 'loss', 'content': 0.05621713399887085, 'timestamp': '2025-10-01 04:32:25.628899', 'step': 11420, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:25.705308', 'step': 11420, 'epoch': 2} {'type': 'loss', 'content': 0.17447517812252045, 'timestamp': '2025-10-01 04:32:25.707575', 'step': 11421, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:25.761552', 'step': 11421, 'epoch': 2} {'type': 'loss', 'content': 0.09886393696069717, 'timestamp': '2025-10-01 04:32:25.763630', 'step': 11422, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:32:25.817410', 'step': 11422, 'epoch': 2} {'type': 'loss', 'content': 0.15240758657455444, 'timestamp': '2025-10-01 04:32:25.819434', 'step': 11423, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:25.873536', 'step': 11423, 'epoch': 2} {'type': 'loss', 'content': 0.09395850449800491, 'timestamp': '2025-10-01 04:32:25.879473', 'step': 11424, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:25.934970', 'step': 11424, 'epoch': 2} {'type': 'loss', 'content': 0.10000578314065933, 'timestamp': '2025-10-01 04:32:25.937492', 'step': 11425, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:25.991841', 'step': 11425, 'epoch': 2} {'type': 'loss', 'content': 0.18532684445381165, 'timestamp': '2025-10-01 04:32:25.994301', 'step': 11426, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:26.049645', 'step': 11426, 'epoch': 2} {'type': 'loss', 'content': 0.06383882462978363, 'timestamp': '2025-10-01 04:32:26.051911', 'step': 11427, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:26.105497', 'step': 11427, 'epoch': 2} {'type': 'loss', 'content': 0.09277524799108505, 'timestamp': '2025-10-01 04:32:26.112969', 'step': 11428, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:26.166307', 'step': 11428, 'epoch': 2} {'type': 'loss', 'content': 0.22068962454795837, 'timestamp': '2025-10-01 04:32:26.168757', 'step': 11429, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:26.224711', 'step': 11429, 'epoch': 2} {'type': 'loss', 'content': 0.16713231801986694, 'timestamp': '2025-10-01 04:32:26.227485', 'step': 11430, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:32:26.289130', 'step': 11430, 'epoch': 2} {'type': 'loss', 'content': 0.11573914438486099, 'timestamp': '2025-10-01 04:32:26.291741', 'step': 11431, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:32:26.348863', 'step': 11431, 'epoch': 2} {'type': 'loss', 'content': 0.12522736191749573, 'timestamp': '2025-10-01 04:32:26.354630', 'step': 11432, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:32:26.407973', 'step': 11432, 'epoch': 2} {'type': 'loss', 'content': 0.16175203025341034, 'timestamp': '2025-10-01 04:32:26.410141', 'step': 11433, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:26.464356', 'step': 11433, 'epoch': 2} {'type': 'loss', 'content': 0.15115278959274292, 'timestamp': '2025-10-01 04:32:26.466441', 'step': 11434, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:32:26.520045', 'step': 11434, 'epoch': 2} {'type': 'loss', 'content': 0.11504296958446503, 'timestamp': '2025-10-01 04:32:26.522232', 'step': 11435, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:32:26.575701', 'step': 11435, 'epoch': 2} {'type': 'loss', 'content': 0.08674013614654541, 'timestamp': '2025-10-01 04:32:26.581554', 'step': 11436, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:26.634724', 'step': 11436, 'epoch': 2} {'type': 'loss', 'content': 0.1510261446237564, 'timestamp': '2025-10-01 04:32:26.637141', 'step': 11437, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:26.690570', 'step': 11437, 'epoch': 2} {'type': 'loss', 'content': 0.09267312288284302, 'timestamp': '2025-10-01 04:32:26.692926', 'step': 11438, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:26.746278', 'step': 11438, 'epoch': 2} {'type': 'loss', 'content': 0.14871777594089508, 'timestamp': '2025-10-01 04:32:26.755798', 'step': 11439, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:26.809669', 'step': 11439, 'epoch': 2} {'type': 'loss', 'content': 0.13210807740688324, 'timestamp': '2025-10-01 04:32:26.815755', 'step': 11440, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:26.868712', 'step': 11440, 'epoch': 2} {'type': 'loss', 'content': 0.08073602616786957, 'timestamp': '2025-10-01 04:32:26.870792', 'step': 11441, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-01 04:32:26.924261', 'step': 11441, 'epoch': 2} {'type': 'loss', 'content': 0.11818096041679382, 'timestamp': '2025-10-01 04:32:26.926456', 'step': 11442, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:26.980165', 'step': 11442, 'epoch': 2} {'type': 'loss', 'content': 0.09496480226516724, 'timestamp': '2025-10-01 04:32:26.982052', 'step': 11443, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:27.035182', 'step': 11443, 'epoch': 2} {'type': 'loss', 'content': 0.07062312215566635, 'timestamp': '2025-10-01 04:32:27.040910', 'step': 11444, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:27.093905', 'step': 11444, 'epoch': 2} {'type': 'loss', 'content': 0.13938912749290466, 'timestamp': '2025-10-01 04:32:27.095792', 'step': 11445, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:32:27.149467', 'step': 11445, 'epoch': 2} {'type': 'loss', 'content': 0.05725245550274849, 'timestamp': '2025-10-01 04:32:27.153097', 'step': 11446, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:27.207645', 'step': 11446, 'epoch': 2} {'type': 'loss', 'content': 0.08628036826848984, 'timestamp': '2025-10-01 04:32:27.209855', 'step': 11447, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:27.265305', 'step': 11447, 'epoch': 2} {'type': 'loss', 'content': 0.0705166757106781, 'timestamp': '2025-10-01 04:32:27.271527', 'step': 11448, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:27.324931', 'step': 11448, 'epoch': 2} {'type': 'loss', 'content': 0.1334901750087738, 'timestamp': '2025-10-01 04:32:27.327033', 'step': 11449, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:32:27.381009', 'step': 11449, 'epoch': 2} {'type': 'loss', 'content': 0.10077421367168427, 'timestamp': '2025-10-01 04:32:27.382943', 'step': 11450, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:27.436647', 'step': 11450, 'epoch': 2} {'type': 'loss', 'content': 0.12046366930007935, 'timestamp': '2025-10-01 04:32:27.438888', 'step': 11451, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:27.492017', 'step': 11451, 'epoch': 2} {'type': 'loss', 'content': 0.06678301095962524, 'timestamp': '2025-10-01 04:32:27.497881', 'step': 11452, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:27.551113', 'step': 11452, 'epoch': 2} {'type': 'loss', 'content': 0.07143329083919525, 'timestamp': '2025-10-01 04:32:27.553372', 'step': 11453, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:27.606872', 'step': 11453, 'epoch': 2} {'type': 'loss', 'content': 0.08765321969985962, 'timestamp': '2025-10-01 04:32:27.608898', 'step': 11454, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:27.662416', 'step': 11454, 'epoch': 2} {'type': 'loss', 'content': 0.08783597499132156, 'timestamp': '2025-10-01 04:32:27.664544', 'step': 11455, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:27.718057', 'step': 11455, 'epoch': 2} {'type': 'loss', 'content': 0.1662730574607849, 'timestamp': '2025-10-01 04:32:27.723801', 'step': 11456, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:27.776840', 'step': 11456, 'epoch': 2} {'type': 'loss', 'content': 0.08211036026477814, 'timestamp': '2025-10-01 04:32:27.778878', 'step': 11457, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:27.831926', 'step': 11457, 'epoch': 2} {'type': 'loss', 'content': 0.19098201394081116, 'timestamp': '2025-10-01 04:32:27.834107', 'step': 11458, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:27.888311', 'step': 11458, 'epoch': 2} {'type': 'loss', 'content': 0.13142932951450348, 'timestamp': '2025-10-01 04:32:27.902271', 'step': 11459, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:32:27.956718', 'step': 11459, 'epoch': 2} {'type': 'loss', 'content': 0.16950899362564087, 'timestamp': '2025-10-01 04:32:27.963001', 'step': 11460, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:28.015812', 'step': 11460, 'epoch': 2} {'type': 'loss', 'content': 0.11383779346942902, 'timestamp': '2025-10-01 04:32:28.018043', 'step': 11461, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:28.071487', 'step': 11461, 'epoch': 2} {'type': 'loss', 'content': 0.08703522384166718, 'timestamp': '2025-10-01 04:32:28.073882', 'step': 11462, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:28.129620', 'step': 11462, 'epoch': 2} {'type': 'loss', 'content': 0.11740250140428543, 'timestamp': '2025-10-01 04:32:28.131922', 'step': 11463, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:32:28.185127', 'step': 11463, 'epoch': 2} {'type': 'loss', 'content': 0.10948841273784637, 'timestamp': '2025-10-01 04:32:28.190594', 'step': 11464, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:32:28.244471', 'step': 11464, 'epoch': 2} {'type': 'loss', 'content': 0.175924614071846, 'timestamp': '2025-10-01 04:32:28.246546', 'step': 11465, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:28.329436', 'step': 11465, 'epoch': 2} {'type': 'loss', 'content': 0.17149601876735687, 'timestamp': '2025-10-01 04:32:28.331613', 'step': 11466, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:28.385282', 'step': 11466, 'epoch': 2} {'type': 'loss', 'content': 0.14912796020507812, 'timestamp': '2025-10-01 04:32:28.387436', 'step': 11467, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:28.441433', 'step': 11467, 'epoch': 2} {'type': 'loss', 'content': 0.11061292141675949, 'timestamp': '2025-10-01 04:32:28.447452', 'step': 11468, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:28.513477', 'step': 11468, 'epoch': 2} {'type': 'loss', 'content': 0.18976326286792755, 'timestamp': '2025-10-01 04:32:28.515493', 'step': 11469, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:28.569323', 'step': 11469, 'epoch': 2} {'type': 'loss', 'content': 0.0876188725233078, 'timestamp': '2025-10-01 04:32:28.572803', 'step': 11470, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:32:28.626831', 'step': 11470, 'epoch': 2} {'type': 'loss', 'content': 0.10557352751493454, 'timestamp': '2025-10-01 04:32:28.629039', 'step': 11471, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:28.682202', 'step': 11471, 'epoch': 2} {'type': 'loss', 'content': 0.07807283103466034, 'timestamp': '2025-10-01 04:32:28.687795', 'step': 11472, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:28.741485', 'step': 11472, 'epoch': 2} {'type': 'loss', 'content': 0.2581448256969452, 'timestamp': '2025-10-01 04:32:28.743475', 'step': 11473, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:28.796390', 'step': 11473, 'epoch': 2} {'type': 'loss', 'content': 0.10046660900115967, 'timestamp': '2025-10-01 04:32:28.798528', 'step': 11474, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:32:28.852824', 'step': 11474, 'epoch': 2} {'type': 'loss', 'content': 0.12257509678602219, 'timestamp': '2025-10-01 04:32:28.854925', 'step': 11475, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:32:28.908669', 'step': 11475, 'epoch': 2} {'type': 'loss', 'content': 0.16213850677013397, 'timestamp': '2025-10-01 04:32:28.914847', 'step': 11476, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:28.968576', 'step': 11476, 'epoch': 2} {'type': 'loss', 'content': 0.05959957093000412, 'timestamp': '2025-10-01 04:32:28.970589', 'step': 11477, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:29.024585', 'step': 11477, 'epoch': 2} {'type': 'loss', 'content': 0.16389994323253632, 'timestamp': '2025-10-01 04:32:29.026670', 'step': 11478, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:29.081214', 'step': 11478, 'epoch': 2} {'type': 'loss', 'content': 0.12304749339818954, 'timestamp': '2025-10-01 04:32:29.083180', 'step': 11479, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:29.136785', 'step': 11479, 'epoch': 2} {'type': 'loss', 'content': 0.06328420341014862, 'timestamp': '2025-10-01 04:32:29.142642', 'step': 11480, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:29.195611', 'step': 11480, 'epoch': 2} {'type': 'loss', 'content': 0.12874558568000793, 'timestamp': '2025-10-01 04:32:29.197598', 'step': 11481, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:29.250730', 'step': 11481, 'epoch': 2} {'type': 'loss', 'content': 0.2613348364830017, 'timestamp': '2025-10-01 04:32:29.252776', 'step': 11482, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:29.306016', 'step': 11482, 'epoch': 2} {'type': 'loss', 'content': 0.11023854464292526, 'timestamp': '2025-10-01 04:32:29.308216', 'step': 11483, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:29.361037', 'step': 11483, 'epoch': 2} {'type': 'loss', 'content': 0.15456803143024445, 'timestamp': '2025-10-01 04:32:29.366966', 'step': 11484, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:29.420363', 'step': 11484, 'epoch': 2} {'type': 'loss', 'content': 0.06923335790634155, 'timestamp': '2025-10-01 04:32:29.422345', 'step': 11485, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:29.475961', 'step': 11485, 'epoch': 2} {'type': 'loss', 'content': 0.13167504966259003, 'timestamp': '2025-10-01 04:32:29.478183', 'step': 11486, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:29.532730', 'step': 11486, 'epoch': 2} {'type': 'loss', 'content': 0.08495115488767624, 'timestamp': '2025-10-01 04:32:29.534890', 'step': 11487, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:29.590186', 'step': 11487, 'epoch': 2} {'type': 'loss', 'content': 0.09202888607978821, 'timestamp': '2025-10-01 04:32:29.596245', 'step': 11488, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:29.649856', 'step': 11488, 'epoch': 2} {'type': 'loss', 'content': 0.1627146452665329, 'timestamp': '2025-10-01 04:32:29.651918', 'step': 11489, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:29.705189', 'step': 11489, 'epoch': 2} {'type': 'loss', 'content': 0.13421259820461273, 'timestamp': '2025-10-01 04:32:29.707170', 'step': 11490, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:32:29.760837', 'step': 11490, 'epoch': 2} {'type': 'loss', 'content': 0.11602377891540527, 'timestamp': '2025-10-01 04:32:29.762901', 'step': 11491, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:29.816023', 'step': 11491, 'epoch': 2} {'type': 'loss', 'content': 0.12596562504768372, 'timestamp': '2025-10-01 04:32:29.821949', 'step': 11492, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:29.874355', 'step': 11492, 'epoch': 2} {'type': 'loss', 'content': 0.14769069850444794, 'timestamp': '2025-10-01 04:32:29.876501', 'step': 11493, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:32:29.929817', 'step': 11493, 'epoch': 2} {'type': 'loss', 'content': 0.11815542727708817, 'timestamp': '2025-10-01 04:32:29.931458', 'step': 11494, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:29.984703', 'step': 11494, 'epoch': 2} {'type': 'loss', 'content': 0.12714606523513794, 'timestamp': '2025-10-01 04:32:29.986618', 'step': 11495, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:32:30.039593', 'step': 11495, 'epoch': 2} {'type': 'loss', 'content': 0.1594201624393463, 'timestamp': '2025-10-01 04:32:30.045539', 'step': 11496, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:30.098414', 'step': 11496, 'epoch': 2} {'type': 'loss', 'content': 0.14419978857040405, 'timestamp': '2025-10-01 04:32:30.100600', 'step': 11497, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:30.154127', 'step': 11497, 'epoch': 2} {'type': 'loss', 'content': 0.08454800397157669, 'timestamp': '2025-10-01 04:32:30.156221', 'step': 11498, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:30.209766', 'step': 11498, 'epoch': 2} {'type': 'loss', 'content': 0.10724753886461258, 'timestamp': '2025-10-01 04:32:30.213052', 'step': 11499, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:30.266202', 'step': 11499, 'epoch': 2} {'type': 'loss', 'content': 0.06667540222406387, 'timestamp': '2025-10-01 04:32:30.272169', 'step': 11500, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 11500', 'timestamp': '2025-10-01 04:32:30.654053', 'step': 11500, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:30.710858', 'step': 11500, 'epoch': 2} {'type': 'loss', 'content': 0.1303238570690155, 'timestamp': '2025-10-01 04:32:30.712921', 'step': 11501, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:30.772915', 'step': 11501, 'epoch': 2} {'type': 'loss', 'content': 0.08503234386444092, 'timestamp': '2025-10-01 04:32:30.775124', 'step': 11502, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:30.830385', 'step': 11502, 'epoch': 2} {'type': 'loss', 'content': 0.2044975906610489, 'timestamp': '2025-10-01 04:32:30.832598', 'step': 11503, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:30.886678', 'step': 11503, 'epoch': 2} {'type': 'loss', 'content': 0.1036173552274704, 'timestamp': '2025-10-01 04:32:30.901090', 'step': 11504, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:32:30.956525', 'step': 11504, 'epoch': 2} {'type': 'loss', 'content': 0.10577680170536041, 'timestamp': '2025-10-01 04:32:30.958877', 'step': 11505, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:31.013361', 'step': 11505, 'epoch': 2} {'type': 'loss', 'content': 0.05283960700035095, 'timestamp': '2025-10-01 04:32:31.015662', 'step': 11506, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:31.069869', 'step': 11506, 'epoch': 2} {'type': 'loss', 'content': 0.09890501201152802, 'timestamp': '2025-10-01 04:32:31.075204', 'step': 11507, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:31.135316', 'step': 11507, 'epoch': 2} {'type': 'loss', 'content': 0.17192663252353668, 'timestamp': '2025-10-01 04:32:31.141747', 'step': 11508, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:31.196818', 'step': 11508, 'epoch': 2} {'type': 'loss', 'content': 0.08536278456449509, 'timestamp': '2025-10-01 04:32:31.199235', 'step': 11509, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:31.253576', 'step': 11509, 'epoch': 2} {'type': 'loss', 'content': 0.18290337920188904, 'timestamp': '2025-10-01 04:32:31.255997', 'step': 11510, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:31.310800', 'step': 11510, 'epoch': 2} {'type': 'loss', 'content': 0.08262268453836441, 'timestamp': '2025-10-01 04:32:31.317092', 'step': 11511, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:31.375731', 'step': 11511, 'epoch': 2} {'type': 'loss', 'content': 0.14055940508842468, 'timestamp': '2025-10-01 04:32:31.382357', 'step': 11512, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:31.441834', 'step': 11512, 'epoch': 2} {'type': 'loss', 'content': 0.060233183205127716, 'timestamp': '2025-10-01 04:32:31.444638', 'step': 11513, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:31.500262', 'step': 11513, 'epoch': 2} {'type': 'loss', 'content': 0.14510785043239594, 'timestamp': '2025-10-01 04:32:31.503321', 'step': 11514, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:32:31.565123', 'step': 11514, 'epoch': 2} {'type': 'loss', 'content': 0.10250888019800186, 'timestamp': '2025-10-01 04:32:31.567237', 'step': 11515, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:31.620996', 'step': 11515, 'epoch': 2} {'type': 'loss', 'content': 0.11053392291069031, 'timestamp': '2025-10-01 04:32:31.627734', 'step': 11516, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:31.681856', 'step': 11516, 'epoch': 2} {'type': 'loss', 'content': 0.17859704792499542, 'timestamp': '2025-10-01 04:32:31.684102', 'step': 11517, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:31.739421', 'step': 11517, 'epoch': 2} {'type': 'loss', 'content': 0.1447327882051468, 'timestamp': '2025-10-01 04:32:31.741960', 'step': 11518, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:32:31.796795', 'step': 11518, 'epoch': 2} {'type': 'loss', 'content': 0.15124550461769104, 'timestamp': '2025-10-01 04:32:31.799313', 'step': 11519, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:31.854202', 'step': 11519, 'epoch': 2} {'type': 'loss', 'content': 0.1558394581079483, 'timestamp': '2025-10-01 04:32:31.860202', 'step': 11520, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:31.914624', 'step': 11520, 'epoch': 2} {'type': 'loss', 'content': 0.19184818863868713, 'timestamp': '2025-10-01 04:32:31.917522', 'step': 11521, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:31.972078', 'step': 11521, 'epoch': 2} {'type': 'loss', 'content': 0.1511244922876358, 'timestamp': '2025-10-01 04:32:31.974599', 'step': 11522, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:32.029059', 'step': 11522, 'epoch': 2} {'type': 'loss', 'content': 0.165945366024971, 'timestamp': '2025-10-01 04:32:32.031545', 'step': 11523, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:32.085508', 'step': 11523, 'epoch': 2} {'type': 'loss', 'content': 0.1739385724067688, 'timestamp': '2025-10-01 04:32:32.091689', 'step': 11524, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:32.145487', 'step': 11524, 'epoch': 2} {'type': 'loss', 'content': 0.09204699099063873, 'timestamp': '2025-10-01 04:32:32.148215', 'step': 11525, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:32.201883', 'step': 11525, 'epoch': 2} {'type': 'loss', 'content': 0.1458277553319931, 'timestamp': '2025-10-01 04:32:32.204363', 'step': 11526, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:32.258953', 'step': 11526, 'epoch': 2} {'type': 'loss', 'content': 0.10946562886238098, 'timestamp': '2025-10-01 04:32:32.261303', 'step': 11527, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:32.315724', 'step': 11527, 'epoch': 2} {'type': 'loss', 'content': 0.11419215798377991, 'timestamp': '2025-10-01 04:32:32.322053', 'step': 11528, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:32.376296', 'step': 11528, 'epoch': 2} {'type': 'loss', 'content': 0.15934105217456818, 'timestamp': '2025-10-01 04:32:32.378586', 'step': 11529, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:32:32.432798', 'step': 11529, 'epoch': 2} {'type': 'loss', 'content': 0.08346977829933167, 'timestamp': '2025-10-01 04:32:32.435187', 'step': 11530, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:32.489745', 'step': 11530, 'epoch': 2} {'type': 'loss', 'content': 0.09578868746757507, 'timestamp': '2025-10-01 04:32:32.497232', 'step': 11531, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:32.552307', 'step': 11531, 'epoch': 2} {'type': 'loss', 'content': 0.10821205377578735, 'timestamp': '2025-10-01 04:32:32.558118', 'step': 11532, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:32.612481', 'step': 11532, 'epoch': 2} {'type': 'loss', 'content': 0.139895960688591, 'timestamp': '2025-10-01 04:32:32.614602', 'step': 11533, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:32.668823', 'step': 11533, 'epoch': 2} {'type': 'loss', 'content': 0.055702194571495056, 'timestamp': '2025-10-01 04:32:32.670869', 'step': 11534, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:32:32.725214', 'step': 11534, 'epoch': 2} {'type': 'loss', 'content': 0.13668984174728394, 'timestamp': '2025-10-01 04:32:32.727434', 'step': 11535, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:32:32.782854', 'step': 11535, 'epoch': 2} {'type': 'loss', 'content': 0.1368645429611206, 'timestamp': '2025-10-01 04:32:32.789167', 'step': 11536, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:32.841750', 'step': 11536, 'epoch': 2} {'type': 'loss', 'content': 0.16043242812156677, 'timestamp': '2025-10-01 04:32:32.843769', 'step': 11537, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:32.896864', 'step': 11537, 'epoch': 2} {'type': 'loss', 'content': 0.10441859811544418, 'timestamp': '2025-10-01 04:32:32.898925', 'step': 11538, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:32.952592', 'step': 11538, 'epoch': 2} {'type': 'loss', 'content': 0.06650321930646896, 'timestamp': '2025-10-01 04:32:32.954813', 'step': 11539, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:33.008651', 'step': 11539, 'epoch': 2} {'type': 'loss', 'content': 0.2169669270515442, 'timestamp': '2025-10-01 04:32:33.014632', 'step': 11540, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:33.067565', 'step': 11540, 'epoch': 2} {'type': 'loss', 'content': 0.09958299994468689, 'timestamp': '2025-10-01 04:32:33.069632', 'step': 11541, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:33.124336', 'step': 11541, 'epoch': 2} {'type': 'loss', 'content': 0.15289731323719025, 'timestamp': '2025-10-01 04:32:33.126546', 'step': 11542, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:33.180722', 'step': 11542, 'epoch': 2} {'type': 'loss', 'content': 0.07904355227947235, 'timestamp': '2025-10-01 04:32:33.182817', 'step': 11543, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:32:33.237198', 'step': 11543, 'epoch': 2} {'type': 'loss', 'content': 0.11613814532756805, 'timestamp': '2025-10-01 04:32:33.243017', 'step': 11544, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:33.296502', 'step': 11544, 'epoch': 2} {'type': 'loss', 'content': 0.1468752920627594, 'timestamp': '2025-10-01 04:32:33.301496', 'step': 11545, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:33.367876', 'step': 11545, 'epoch': 2} {'type': 'loss', 'content': 0.11960139870643616, 'timestamp': '2025-10-01 04:32:33.371080', 'step': 11546, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:33.426756', 'step': 11546, 'epoch': 2} {'type': 'loss', 'content': 0.15105347335338593, 'timestamp': '2025-10-01 04:32:33.433913', 'step': 11547, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:33.496674', 'step': 11547, 'epoch': 2} {'type': 'loss', 'content': 0.1404619663953781, 'timestamp': '2025-10-01 04:32:33.502891', 'step': 11548, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:33.556281', 'step': 11548, 'epoch': 2} {'type': 'loss', 'content': 0.09930752962827682, 'timestamp': '2025-10-01 04:32:33.558562', 'step': 11549, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:33.614133', 'step': 11549, 'epoch': 2} {'type': 'loss', 'content': 0.05039002373814583, 'timestamp': '2025-10-01 04:32:33.616451', 'step': 11550, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:33.671406', 'step': 11550, 'epoch': 2} {'type': 'loss', 'content': 0.06210464611649513, 'timestamp': '2025-10-01 04:32:33.673507', 'step': 11551, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:32:33.727486', 'step': 11551, 'epoch': 2} {'type': 'loss', 'content': 0.08629664778709412, 'timestamp': '2025-10-01 04:32:33.733955', 'step': 11552, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:33.787410', 'step': 11552, 'epoch': 2} {'type': 'loss', 'content': 0.09141089022159576, 'timestamp': '2025-10-01 04:32:33.789612', 'step': 11553, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:33.844251', 'step': 11553, 'epoch': 2} {'type': 'loss', 'content': 0.07775508612394333, 'timestamp': '2025-10-01 04:32:33.846514', 'step': 11554, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:33.901038', 'step': 11554, 'epoch': 2} {'type': 'loss', 'content': 0.11628230661153793, 'timestamp': '2025-10-01 04:32:33.902964', 'step': 11555, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:33.956960', 'step': 11555, 'epoch': 2} {'type': 'loss', 'content': 0.14201746881008148, 'timestamp': '2025-10-01 04:32:33.963129', 'step': 11556, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:34.016618', 'step': 11556, 'epoch': 2} {'type': 'loss', 'content': 0.08027450740337372, 'timestamp': '2025-10-01 04:32:34.018777', 'step': 11557, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:32:34.088945', 'step': 11557, 'epoch': 2} {'type': 'loss', 'content': 0.1133805364370346, 'timestamp': '2025-10-01 04:32:34.093592', 'step': 11558, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:34.159264', 'step': 11558, 'epoch': 2} {'type': 'loss', 'content': 0.17058464884757996, 'timestamp': '2025-10-01 04:32:34.161285', 'step': 11559, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:34.216398', 'step': 11559, 'epoch': 2} {'type': 'loss', 'content': 0.09387242048978806, 'timestamp': '2025-10-01 04:32:34.222787', 'step': 11560, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:34.276215', 'step': 11560, 'epoch': 2} {'type': 'loss', 'content': 0.12076429277658463, 'timestamp': '2025-10-01 04:32:34.278293', 'step': 11561, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:32:34.332169', 'step': 11561, 'epoch': 2} {'type': 'loss', 'content': 0.12489736825227737, 'timestamp': '2025-10-01 04:32:34.334316', 'step': 11562, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:34.387664', 'step': 11562, 'epoch': 2} {'type': 'loss', 'content': 0.16009607911109924, 'timestamp': '2025-10-01 04:32:34.390902', 'step': 11563, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:34.446025', 'step': 11563, 'epoch': 2} {'type': 'loss', 'content': 0.09141934663057327, 'timestamp': '2025-10-01 04:32:34.451820', 'step': 11564, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:32:34.504992', 'step': 11564, 'epoch': 2} {'type': 'loss', 'content': 0.06391887366771698, 'timestamp': '2025-10-01 04:32:34.507080', 'step': 11565, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:34.560470', 'step': 11565, 'epoch': 2} {'type': 'loss', 'content': 0.14710789918899536, 'timestamp': '2025-10-01 04:32:34.562628', 'step': 11566, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:34.616115', 'step': 11566, 'epoch': 2} {'type': 'loss', 'content': 0.15743231773376465, 'timestamp': '2025-10-01 04:32:34.618440', 'step': 11567, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:34.672117', 'step': 11567, 'epoch': 2} {'type': 'loss', 'content': 0.13678006827831268, 'timestamp': '2025-10-01 04:32:34.678150', 'step': 11568, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:34.731066', 'step': 11568, 'epoch': 2} {'type': 'loss', 'content': 0.12630923092365265, 'timestamp': '2025-10-01 04:32:34.732778', 'step': 11569, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:34.785799', 'step': 11569, 'epoch': 2} {'type': 'loss', 'content': 0.09482767432928085, 'timestamp': '2025-10-01 04:32:34.787976', 'step': 11570, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:34.842576', 'step': 11570, 'epoch': 2} {'type': 'loss', 'content': 0.2268143594264984, 'timestamp': '2025-10-01 04:32:34.845521', 'step': 11571, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:34.898726', 'step': 11571, 'epoch': 2} {'type': 'loss', 'content': 0.07871562987565994, 'timestamp': '2025-10-01 04:32:34.904354', 'step': 11572, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:34.957594', 'step': 11572, 'epoch': 2} {'type': 'loss', 'content': 0.11272988468408585, 'timestamp': '2025-10-01 04:32:34.960589', 'step': 11573, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:32:35.014324', 'step': 11573, 'epoch': 2} {'type': 'loss', 'content': 0.09291771799325943, 'timestamp': '2025-10-01 04:32:35.016102', 'step': 11574, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:35.071570', 'step': 11574, 'epoch': 2} {'type': 'loss', 'content': 0.11993809044361115, 'timestamp': '2025-10-01 04:32:35.073586', 'step': 11575, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:32:35.127710', 'step': 11575, 'epoch': 2} {'type': 'loss', 'content': 0.032603807747364044, 'timestamp': '2025-10-01 04:32:35.133637', 'step': 11576, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:35.186979', 'step': 11576, 'epoch': 2} {'type': 'loss', 'content': 0.15568654239177704, 'timestamp': '2025-10-01 04:32:35.188920', 'step': 11577, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:32:35.242129', 'step': 11577, 'epoch': 2} {'type': 'loss', 'content': 0.0635005459189415, 'timestamp': '2025-10-01 04:32:35.244257', 'step': 11578, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:35.298184', 'step': 11578, 'epoch': 2} {'type': 'loss', 'content': 0.12158512324094772, 'timestamp': '2025-10-01 04:32:35.300149', 'step': 11579, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:35.353900', 'step': 11579, 'epoch': 2} {'type': 'loss', 'content': 0.07166250050067902, 'timestamp': '2025-10-01 04:32:35.359417', 'step': 11580, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:35.412694', 'step': 11580, 'epoch': 2} {'type': 'loss', 'content': 0.15891405940055847, 'timestamp': '2025-10-01 04:32:35.414429', 'step': 11581, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:35.467798', 'step': 11581, 'epoch': 2} {'type': 'loss', 'content': 0.10333124548196793, 'timestamp': '2025-10-01 04:32:35.469643', 'step': 11582, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:35.523736', 'step': 11582, 'epoch': 2} {'type': 'loss', 'content': 0.1306469589471817, 'timestamp': '2025-10-01 04:32:35.525836', 'step': 11583, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:35.579186', 'step': 11583, 'epoch': 2} {'type': 'loss', 'content': 0.10754998028278351, 'timestamp': '2025-10-01 04:32:35.585025', 'step': 11584, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:35.637869', 'step': 11584, 'epoch': 2} {'type': 'loss', 'content': 0.15681561827659607, 'timestamp': '2025-10-01 04:32:35.639942', 'step': 11585, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:35.694090', 'step': 11585, 'epoch': 2} {'type': 'loss', 'content': 0.11891859769821167, 'timestamp': '2025-10-01 04:32:35.696312', 'step': 11586, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:35.752068', 'step': 11586, 'epoch': 2} {'type': 'loss', 'content': 0.1013202890753746, 'timestamp': '2025-10-01 04:32:35.754163', 'step': 11587, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:32:35.809246', 'step': 11587, 'epoch': 2} {'type': 'loss', 'content': 0.07807411998510361, 'timestamp': '2025-10-01 04:32:35.815143', 'step': 11588, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:35.869472', 'step': 11588, 'epoch': 2} {'type': 'loss', 'content': 0.05383528396487236, 'timestamp': '2025-10-01 04:32:35.871416', 'step': 11589, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:35.926346', 'step': 11589, 'epoch': 2} {'type': 'loss', 'content': 0.15708334743976593, 'timestamp': '2025-10-01 04:32:35.928558', 'step': 11590, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:35.984007', 'step': 11590, 'epoch': 2} {'type': 'loss', 'content': 0.17378981411457062, 'timestamp': '2025-10-01 04:32:35.986059', 'step': 11591, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:36.040101', 'step': 11591, 'epoch': 2} {'type': 'loss', 'content': 0.12429866939783096, 'timestamp': '2025-10-01 04:32:36.046682', 'step': 11592, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:36.100436', 'step': 11592, 'epoch': 2} {'type': 'loss', 'content': 0.1320420503616333, 'timestamp': '2025-10-01 04:32:36.102995', 'step': 11593, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:32:36.157506', 'step': 11593, 'epoch': 2} {'type': 'loss', 'content': 0.14735902845859528, 'timestamp': '2025-10-01 04:32:36.159624', 'step': 11594, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:36.217115', 'step': 11594, 'epoch': 2} {'type': 'loss', 'content': 0.11178320646286011, 'timestamp': '2025-10-01 04:32:36.219117', 'step': 11595, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:36.273246', 'step': 11595, 'epoch': 2} {'type': 'loss', 'content': 0.18385253846645355, 'timestamp': '2025-10-01 04:32:36.279453', 'step': 11596, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:36.333187', 'step': 11596, 'epoch': 2} {'type': 'loss', 'content': 0.08971332013607025, 'timestamp': '2025-10-01 04:32:36.335538', 'step': 11597, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:32:36.389558', 'step': 11597, 'epoch': 2} {'type': 'loss', 'content': 0.14865346252918243, 'timestamp': '2025-10-01 04:32:36.391882', 'step': 11598, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:36.445851', 'step': 11598, 'epoch': 2} {'type': 'loss', 'content': 0.15616466104984283, 'timestamp': '2025-10-01 04:32:36.447785', 'step': 11599, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:36.501323', 'step': 11599, 'epoch': 2} {'type': 'loss', 'content': 0.07691950350999832, 'timestamp': '2025-10-01 04:32:36.507269', 'step': 11600, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:36.560366', 'step': 11600, 'epoch': 2} {'type': 'loss', 'content': 0.064997099339962, 'timestamp': '2025-10-01 04:32:36.562222', 'step': 11601, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:36.616259', 'step': 11601, 'epoch': 2} {'type': 'loss', 'content': 0.042226746678352356, 'timestamp': '2025-10-01 04:32:36.618038', 'step': 11602, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:36.671529', 'step': 11602, 'epoch': 2} {'type': 'loss', 'content': 0.10114166885614395, 'timestamp': '2025-10-01 04:32:36.673381', 'step': 11603, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:36.726874', 'step': 11603, 'epoch': 2} {'type': 'loss', 'content': 0.12686753273010254, 'timestamp': '2025-10-01 04:32:36.732872', 'step': 11604, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:36.787053', 'step': 11604, 'epoch': 2} {'type': 'loss', 'content': 0.1613766849040985, 'timestamp': '2025-10-01 04:32:36.789302', 'step': 11605, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:36.844269', 'step': 11605, 'epoch': 2} {'type': 'loss', 'content': 0.13670478761196136, 'timestamp': '2025-10-01 04:32:36.846395', 'step': 11606, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:32:36.901042', 'step': 11606, 'epoch': 2} {'type': 'loss', 'content': 0.1157246008515358, 'timestamp': '2025-10-01 04:32:36.903407', 'step': 11607, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:36.957535', 'step': 11607, 'epoch': 2} {'type': 'loss', 'content': 0.08026096969842911, 'timestamp': '2025-10-01 04:32:36.963895', 'step': 11608, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:32:37.017985', 'step': 11608, 'epoch': 2} {'type': 'loss', 'content': 0.07185812294483185, 'timestamp': '2025-10-01 04:32:37.019838', 'step': 11609, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:32:37.074841', 'step': 11609, 'epoch': 2} {'type': 'loss', 'content': 0.11049088835716248, 'timestamp': '2025-10-01 04:32:37.076834', 'step': 11610, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:37.131576', 'step': 11610, 'epoch': 2} {'type': 'loss', 'content': 0.13554589450359344, 'timestamp': '2025-10-01 04:32:37.133998', 'step': 11611, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:37.189217', 'step': 11611, 'epoch': 2} {'type': 'loss', 'content': 0.07666467875242233, 'timestamp': '2025-10-01 04:32:37.196173', 'step': 11612, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:37.252879', 'step': 11612, 'epoch': 2} {'type': 'loss', 'content': 0.11144161224365234, 'timestamp': '2025-10-01 04:32:37.255506', 'step': 11613, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:37.321540', 'step': 11613, 'epoch': 2} {'type': 'loss', 'content': 0.13172203302383423, 'timestamp': '2025-10-01 04:32:37.323742', 'step': 11614, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:37.380973', 'step': 11614, 'epoch': 2} {'type': 'loss', 'content': 0.05945594981312752, 'timestamp': '2025-10-01 04:32:37.383222', 'step': 11615, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:37.442892', 'step': 11615, 'epoch': 2} {'type': 'loss', 'content': 0.08648553490638733, 'timestamp': '2025-10-01 04:32:37.449765', 'step': 11616, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:37.509617', 'step': 11616, 'epoch': 2} {'type': 'loss', 'content': 0.17013238370418549, 'timestamp': '2025-10-01 04:32:37.511530', 'step': 11617, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:32:37.572527', 'step': 11617, 'epoch': 2} {'type': 'loss', 'content': 0.11719754338264465, 'timestamp': '2025-10-01 04:32:37.574916', 'step': 11618, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:37.635672', 'step': 11618, 'epoch': 2} {'type': 'loss', 'content': 0.12390852719545364, 'timestamp': '2025-10-01 04:32:37.637893', 'step': 11619, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:37.699704', 'step': 11619, 'epoch': 2} {'type': 'loss', 'content': 0.16226635873317719, 'timestamp': '2025-10-01 04:32:37.707111', 'step': 11620, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:37.766163', 'step': 11620, 'epoch': 2} {'type': 'loss', 'content': 0.09113028645515442, 'timestamp': '2025-10-01 04:32:37.768629', 'step': 11621, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:32:37.830600', 'step': 11621, 'epoch': 2} {'type': 'loss', 'content': 0.13702459633350372, 'timestamp': '2025-10-01 04:32:37.832627', 'step': 11622, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:37.892947', 'step': 11622, 'epoch': 2} {'type': 'loss', 'content': 0.145846888422966, 'timestamp': '2025-10-01 04:32:37.894861', 'step': 11623, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:32:37.957539', 'step': 11623, 'epoch': 2} {'type': 'loss', 'content': 0.045154720544815063, 'timestamp': '2025-10-01 04:32:37.964474', 'step': 11624, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:38.025904', 'step': 11624, 'epoch': 2} {'type': 'loss', 'content': 0.0861324593424797, 'timestamp': '2025-10-01 04:32:38.028402', 'step': 11625, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:38.088397', 'step': 11625, 'epoch': 2} {'type': 'loss', 'content': 0.14072343707084656, 'timestamp': '2025-10-01 04:32:38.090782', 'step': 11626, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:38.152757', 'step': 11626, 'epoch': 2} {'type': 'loss', 'content': 0.12842392921447754, 'timestamp': '2025-10-01 04:32:38.155064', 'step': 11627, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:38.216066', 'step': 11627, 'epoch': 2} {'type': 'loss', 'content': 0.059579379856586456, 'timestamp': '2025-10-01 04:32:38.223113', 'step': 11628, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:32:38.284327', 'step': 11628, 'epoch': 2} {'type': 'loss', 'content': 0.05500655621290207, 'timestamp': '2025-10-01 04:32:38.286280', 'step': 11629, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:38.346640', 'step': 11629, 'epoch': 2} {'type': 'loss', 'content': 0.09046414494514465, 'timestamp': '2025-10-01 04:32:38.348730', 'step': 11630, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:38.421979', 'step': 11630, 'epoch': 2} {'type': 'loss', 'content': 0.07308848202228546, 'timestamp': '2025-10-01 04:32:38.424295', 'step': 11631, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:38.485837', 'step': 11631, 'epoch': 2} {'type': 'loss', 'content': 0.08877679705619812, 'timestamp': '2025-10-01 04:32:38.493088', 'step': 11632, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:38.553198', 'step': 11632, 'epoch': 2} {'type': 'loss', 'content': 0.09957064688205719, 'timestamp': '2025-10-01 04:32:38.555701', 'step': 11633, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:38.616567', 'step': 11633, 'epoch': 2} {'type': 'loss', 'content': 0.06383664906024933, 'timestamp': '2025-10-01 04:32:38.618918', 'step': 11634, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:38.681151', 'step': 11634, 'epoch': 2} {'type': 'loss', 'content': 0.07602278888225555, 'timestamp': '2025-10-01 04:32:38.683559', 'step': 11635, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:32:38.743891', 'step': 11635, 'epoch': 2} {'type': 'loss', 'content': 0.07791241258382797, 'timestamp': '2025-10-01 04:32:38.751381', 'step': 11636, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:38.812603', 'step': 11636, 'epoch': 2} {'type': 'loss', 'content': 0.12155212461948395, 'timestamp': '2025-10-01 04:32:38.814867', 'step': 11637, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:38.875905', 'step': 11637, 'epoch': 2} {'type': 'loss', 'content': 0.13604797422885895, 'timestamp': '2025-10-01 04:32:38.878172', 'step': 11638, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:38.939040', 'step': 11638, 'epoch': 2} {'type': 'loss', 'content': 0.15945567190647125, 'timestamp': '2025-10-01 04:32:38.941540', 'step': 11639, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:39.003476', 'step': 11639, 'epoch': 2} {'type': 'loss', 'content': 0.10845187306404114, 'timestamp': '2025-10-01 04:32:39.010929', 'step': 11640, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:39.069978', 'step': 11640, 'epoch': 2} {'type': 'loss', 'content': 0.04172985255718231, 'timestamp': '2025-10-01 04:32:39.072494', 'step': 11641, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:39.134481', 'step': 11641, 'epoch': 2} {'type': 'loss', 'content': 0.10562939196825027, 'timestamp': '2025-10-01 04:32:39.136812', 'step': 11642, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:39.197173', 'step': 11642, 'epoch': 2} {'type': 'loss', 'content': 0.18625210225582123, 'timestamp': '2025-10-01 04:32:39.199352', 'step': 11643, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:39.255277', 'step': 11643, 'epoch': 2} {'type': 'loss', 'content': 0.08078078925609589, 'timestamp': '2025-10-01 04:32:39.261867', 'step': 11644, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:39.317425', 'step': 11644, 'epoch': 2} {'type': 'loss', 'content': 0.11185145378112793, 'timestamp': '2025-10-01 04:32:39.319794', 'step': 11645, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:39.374808', 'step': 11645, 'epoch': 2} {'type': 'loss', 'content': 0.07410828024148941, 'timestamp': '2025-10-01 04:32:39.376970', 'step': 11646, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:39.431863', 'step': 11646, 'epoch': 2} {'type': 'loss', 'content': 0.1000668928027153, 'timestamp': '2025-10-01 04:32:39.434024', 'step': 11647, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:39.488760', 'step': 11647, 'epoch': 2} {'type': 'loss', 'content': 0.07039085030555725, 'timestamp': '2025-10-01 04:32:39.494949', 'step': 11648, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:39.549208', 'step': 11648, 'epoch': 2} {'type': 'loss', 'content': 0.05293174088001251, 'timestamp': '2025-10-01 04:32:39.552580', 'step': 11649, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:32:39.606065', 'step': 11649, 'epoch': 2} {'type': 'loss', 'content': 0.15394103527069092, 'timestamp': '2025-10-01 04:32:39.608222', 'step': 11650, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-01 04:32:52.741432', 'step': 11650, 'epoch': 2} {'type': 'pplx', 'content': 13721.961947136353, 'timestamp': '2025-10-01 04:32:52.744919', 'step': 11650, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:52.799715', 'step': 11650, 'epoch': 2} {'type': 'loss', 'content': 0.09476752579212189, 'timestamp': '2025-10-01 04:32:52.801848', 'step': 11651, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:52.856109', 'step': 11651, 'epoch': 2} {'type': 'loss', 'content': 0.12108806520700455, 'timestamp': '2025-10-01 04:32:52.862254', 'step': 11652, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:52.916072', 'step': 11652, 'epoch': 2} {'type': 'loss', 'content': 0.12137891352176666, 'timestamp': '2025-10-01 04:32:52.918156', 'step': 11653, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:32:52.972307', 'step': 11653, 'epoch': 2} {'type': 'loss', 'content': 0.19316373765468597, 'timestamp': '2025-10-01 04:32:52.974143', 'step': 11654, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:53.027431', 'step': 11654, 'epoch': 2} {'type': 'loss', 'content': 0.13745607435703278, 'timestamp': '2025-10-01 04:32:53.029507', 'step': 11655, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:53.082922', 'step': 11655, 'epoch': 2} {'type': 'loss', 'content': 0.09189572930335999, 'timestamp': '2025-10-01 04:32:53.088712', 'step': 11656, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:53.142309', 'step': 11656, 'epoch': 2} {'type': 'loss', 'content': 0.09603201597929001, 'timestamp': '2025-10-01 04:32:53.144317', 'step': 11657, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:53.198798', 'step': 11657, 'epoch': 2} {'type': 'loss', 'content': 0.07218757271766663, 'timestamp': '2025-10-01 04:32:53.200869', 'step': 11658, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:53.254852', 'step': 11658, 'epoch': 2} {'type': 'loss', 'content': 0.08892577141523361, 'timestamp': '2025-10-01 04:32:53.257058', 'step': 11659, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:53.311115', 'step': 11659, 'epoch': 2} {'type': 'loss', 'content': 0.10246365517377853, 'timestamp': '2025-10-01 04:32:53.317111', 'step': 11660, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:53.371129', 'step': 11660, 'epoch': 2} {'type': 'loss', 'content': 0.09765997529029846, 'timestamp': '2025-10-01 04:32:53.373262', 'step': 11661, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:53.427606', 'step': 11661, 'epoch': 2} {'type': 'loss', 'content': 0.23467007279396057, 'timestamp': '2025-10-01 04:32:53.429882', 'step': 11662, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:53.484142', 'step': 11662, 'epoch': 2} {'type': 'loss', 'content': 0.15973316133022308, 'timestamp': '2025-10-01 04:32:53.486300', 'step': 11663, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:32:53.541406', 'step': 11663, 'epoch': 2} {'type': 'loss', 'content': 0.19122985005378723, 'timestamp': '2025-10-01 04:32:53.547415', 'step': 11664, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:53.601116', 'step': 11664, 'epoch': 2} {'type': 'loss', 'content': 0.09414858371019363, 'timestamp': '2025-10-01 04:32:53.603134', 'step': 11665, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:53.657626', 'step': 11665, 'epoch': 2} {'type': 'loss', 'content': 0.13048061728477478, 'timestamp': '2025-10-01 04:32:53.659839', 'step': 11666, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:53.715322', 'step': 11666, 'epoch': 2} {'type': 'loss', 'content': 0.11442432552576065, 'timestamp': '2025-10-01 04:32:53.717544', 'step': 11667, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:53.772851', 'step': 11667, 'epoch': 2} {'type': 'loss', 'content': 0.09021469950675964, 'timestamp': '2025-10-01 04:32:53.779227', 'step': 11668, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:53.835104', 'step': 11668, 'epoch': 2} {'type': 'loss', 'content': 0.17643354833126068, 'timestamp': '2025-10-01 04:32:53.837126', 'step': 11669, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:53.893018', 'step': 11669, 'epoch': 2} {'type': 'loss', 'content': 0.15486939251422882, 'timestamp': '2025-10-01 04:32:53.894977', 'step': 11670, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:53.971752', 'step': 11670, 'epoch': 2} {'type': 'loss', 'content': 0.06077186390757561, 'timestamp': '2025-10-01 04:32:53.973840', 'step': 11671, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:54.031660', 'step': 11671, 'epoch': 2} {'type': 'loss', 'content': 0.23745954036712646, 'timestamp': '2025-10-01 04:32:54.038037', 'step': 11672, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:54.094134', 'step': 11672, 'epoch': 2} {'type': 'loss', 'content': 0.10392706096172333, 'timestamp': '2025-10-01 04:32:54.096058', 'step': 11673, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:54.154948', 'step': 11673, 'epoch': 2} {'type': 'loss', 'content': 0.06985285878181458, 'timestamp': '2025-10-01 04:32:54.157346', 'step': 11674, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:54.214224', 'step': 11674, 'epoch': 2} {'type': 'loss', 'content': 0.1486348956823349, 'timestamp': '2025-10-01 04:32:54.216602', 'step': 11675, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:54.279682', 'step': 11675, 'epoch': 2} {'type': 'loss', 'content': 0.1473754197359085, 'timestamp': '2025-10-01 04:32:54.286345', 'step': 11676, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:54.343277', 'step': 11676, 'epoch': 2} {'type': 'loss', 'content': 0.13931255042552948, 'timestamp': '2025-10-01 04:32:54.345637', 'step': 11677, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:54.402344', 'step': 11677, 'epoch': 2} {'type': 'loss', 'content': 0.1468525528907776, 'timestamp': '2025-10-01 04:32:54.404591', 'step': 11678, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:54.461582', 'step': 11678, 'epoch': 2} {'type': 'loss', 'content': 0.16062210500240326, 'timestamp': '2025-10-01 04:32:54.463843', 'step': 11679, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:32:54.521903', 'step': 11679, 'epoch': 2} {'type': 'loss', 'content': 0.11389290541410446, 'timestamp': '2025-10-01 04:32:54.528850', 'step': 11680, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:54.584543', 'step': 11680, 'epoch': 2} {'type': 'loss', 'content': 0.0746440440416336, 'timestamp': '2025-10-01 04:32:54.586750', 'step': 11681, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:54.651324', 'step': 11681, 'epoch': 2} {'type': 'loss', 'content': 0.10253772139549255, 'timestamp': '2025-10-01 04:32:54.653495', 'step': 11682, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:54.709861', 'step': 11682, 'epoch': 2} {'type': 'loss', 'content': 0.17208486795425415, 'timestamp': '2025-10-01 04:32:54.713462', 'step': 11683, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:54.768717', 'step': 11683, 'epoch': 2} {'type': 'loss', 'content': 0.1381773054599762, 'timestamp': '2025-10-01 04:32:54.775377', 'step': 11684, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:54.831601', 'step': 11684, 'epoch': 2} {'type': 'loss', 'content': 0.09340818226337433, 'timestamp': '2025-10-01 04:32:54.833786', 'step': 11685, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:32:54.888963', 'step': 11685, 'epoch': 2} {'type': 'loss', 'content': 0.16786594688892365, 'timestamp': '2025-10-01 04:32:54.891297', 'step': 11686, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:54.946709', 'step': 11686, 'epoch': 2} {'type': 'loss', 'content': 0.1818077117204666, 'timestamp': '2025-10-01 04:32:54.948785', 'step': 11687, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:55.004053', 'step': 11687, 'epoch': 2} {'type': 'loss', 'content': 0.14272086322307587, 'timestamp': '2025-10-01 04:32:55.010602', 'step': 11688, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:32:55.065908', 'step': 11688, 'epoch': 2} {'type': 'loss', 'content': 0.05098447948694229, 'timestamp': '2025-10-01 04:32:55.067926', 'step': 11689, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:55.122712', 'step': 11689, 'epoch': 2} {'type': 'loss', 'content': 0.1126626506447792, 'timestamp': '2025-10-01 04:32:55.124905', 'step': 11690, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:55.178948', 'step': 11690, 'epoch': 2} {'type': 'loss', 'content': 0.08454377949237823, 'timestamp': '2025-10-01 04:32:55.181150', 'step': 11691, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:55.234679', 'step': 11691, 'epoch': 2} {'type': 'loss', 'content': 0.12267686426639557, 'timestamp': '2025-10-01 04:32:55.240847', 'step': 11692, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:55.294237', 'step': 11692, 'epoch': 2} {'type': 'loss', 'content': 0.1465354859828949, 'timestamp': '2025-10-01 04:32:55.296236', 'step': 11693, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:55.349641', 'step': 11693, 'epoch': 2} {'type': 'loss', 'content': 0.18493211269378662, 'timestamp': '2025-10-01 04:32:55.351624', 'step': 11694, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:55.405762', 'step': 11694, 'epoch': 2} {'type': 'loss', 'content': 0.21159985661506653, 'timestamp': '2025-10-01 04:32:55.407795', 'step': 11695, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:55.460868', 'step': 11695, 'epoch': 2} {'type': 'loss', 'content': 0.1789955347776413, 'timestamp': '2025-10-01 04:32:55.466653', 'step': 11696, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:55.520207', 'step': 11696, 'epoch': 2} {'type': 'loss', 'content': 0.11389568448066711, 'timestamp': '2025-10-01 04:32:55.522408', 'step': 11697, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:55.584256', 'step': 11697, 'epoch': 2} {'type': 'loss', 'content': 0.12032142281532288, 'timestamp': '2025-10-01 04:32:55.586338', 'step': 11698, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:55.640929', 'step': 11698, 'epoch': 2} {'type': 'loss', 'content': 0.08805502951145172, 'timestamp': '2025-10-01 04:32:55.642843', 'step': 11699, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:55.696468', 'step': 11699, 'epoch': 2} {'type': 'loss', 'content': 0.019348278641700745, 'timestamp': '2025-10-01 04:32:55.702252', 'step': 11700, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:55.755478', 'step': 11700, 'epoch': 2} {'type': 'loss', 'content': 0.05247516185045242, 'timestamp': '2025-10-01 04:32:55.757396', 'step': 11701, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:55.811049', 'step': 11701, 'epoch': 2} {'type': 'loss', 'content': 0.07800433039665222, 'timestamp': '2025-10-01 04:32:55.813228', 'step': 11702, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:32:55.867523', 'step': 11702, 'epoch': 2} {'type': 'loss', 'content': 0.16629961133003235, 'timestamp': '2025-10-01 04:32:55.869773', 'step': 11703, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:55.923980', 'step': 11703, 'epoch': 2} {'type': 'loss', 'content': 0.1549571305513382, 'timestamp': '2025-10-01 04:32:55.929809', 'step': 11704, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:55.982982', 'step': 11704, 'epoch': 2} {'type': 'loss', 'content': 0.09129330515861511, 'timestamp': '2025-10-01 04:32:55.985292', 'step': 11705, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:56.038907', 'step': 11705, 'epoch': 2} {'type': 'loss', 'content': 0.12904001772403717, 'timestamp': '2025-10-01 04:32:56.041222', 'step': 11706, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:56.094862', 'step': 11706, 'epoch': 2} {'type': 'loss', 'content': 0.21970997750759125, 'timestamp': '2025-10-01 04:32:56.096829', 'step': 11707, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:56.149716', 'step': 11707, 'epoch': 2} {'type': 'loss', 'content': 0.11976544559001923, 'timestamp': '2025-10-01 04:32:56.155549', 'step': 11708, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:56.208349', 'step': 11708, 'epoch': 2} {'type': 'loss', 'content': 0.21475623548030853, 'timestamp': '2025-10-01 04:32:56.210522', 'step': 11709, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:32:56.263867', 'step': 11709, 'epoch': 2} {'type': 'loss', 'content': 0.21816067397594452, 'timestamp': '2025-10-01 04:32:56.265981', 'step': 11710, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:56.319613', 'step': 11710, 'epoch': 2} {'type': 'loss', 'content': 0.12456290423870087, 'timestamp': '2025-10-01 04:32:56.321819', 'step': 11711, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:56.375439', 'step': 11711, 'epoch': 2} {'type': 'loss', 'content': 0.09091225266456604, 'timestamp': '2025-10-01 04:32:56.381582', 'step': 11712, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:56.434423', 'step': 11712, 'epoch': 2} {'type': 'loss', 'content': 0.09818383306264877, 'timestamp': '2025-10-01 04:32:56.436479', 'step': 11713, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:56.491180', 'step': 11713, 'epoch': 2} {'type': 'loss', 'content': 0.1424194872379303, 'timestamp': '2025-10-01 04:32:56.493088', 'step': 11714, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:56.546582', 'step': 11714, 'epoch': 2} {'type': 'loss', 'content': 0.13670530915260315, 'timestamp': '2025-10-01 04:32:56.548807', 'step': 11715, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:32:56.602088', 'step': 11715, 'epoch': 2} {'type': 'loss', 'content': 0.07455812394618988, 'timestamp': '2025-10-01 04:32:56.607832', 'step': 11716, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:32:56.661476', 'step': 11716, 'epoch': 2} {'type': 'loss', 'content': 0.16053882241249084, 'timestamp': '2025-10-01 04:32:56.663705', 'step': 11717, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:56.717595', 'step': 11717, 'epoch': 2} {'type': 'loss', 'content': 0.19844584167003632, 'timestamp': '2025-10-01 04:32:56.719803', 'step': 11718, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:56.772757', 'step': 11718, 'epoch': 2} {'type': 'loss', 'content': 0.10835962742567062, 'timestamp': '2025-10-01 04:32:56.774938', 'step': 11719, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:56.828263', 'step': 11719, 'epoch': 2} {'type': 'loss', 'content': 0.10758309811353683, 'timestamp': '2025-10-01 04:32:56.833956', 'step': 11720, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:32:56.887263', 'step': 11720, 'epoch': 2} {'type': 'loss', 'content': 0.17800851166248322, 'timestamp': '2025-10-01 04:32:56.889538', 'step': 11721, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:56.943134', 'step': 11721, 'epoch': 2} {'type': 'loss', 'content': 0.12343668937683105, 'timestamp': '2025-10-01 04:32:56.945270', 'step': 11722, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:56.999277', 'step': 11722, 'epoch': 2} {'type': 'loss', 'content': 0.14680826663970947, 'timestamp': '2025-10-01 04:32:57.001250', 'step': 11723, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:57.054791', 'step': 11723, 'epoch': 2} {'type': 'loss', 'content': 0.08613672852516174, 'timestamp': '2025-10-01 04:32:57.060438', 'step': 11724, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:57.113181', 'step': 11724, 'epoch': 2} {'type': 'loss', 'content': 0.08447171747684479, 'timestamp': '2025-10-01 04:32:57.115540', 'step': 11725, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:57.170500', 'step': 11725, 'epoch': 2} {'type': 'loss', 'content': 0.2138407677412033, 'timestamp': '2025-10-01 04:32:57.172784', 'step': 11726, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:32:57.228121', 'step': 11726, 'epoch': 2} {'type': 'loss', 'content': 0.0750737115740776, 'timestamp': '2025-10-01 04:32:57.230817', 'step': 11727, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:32:57.285761', 'step': 11727, 'epoch': 2} {'type': 'loss', 'content': 0.15527111291885376, 'timestamp': '2025-10-01 04:32:57.291837', 'step': 11728, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:32:57.346483', 'step': 11728, 'epoch': 2} {'type': 'loss', 'content': 0.1655784696340561, 'timestamp': '2025-10-01 04:32:57.348712', 'step': 11729, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:57.403871', 'step': 11729, 'epoch': 2} {'type': 'loss', 'content': 0.07781437784433365, 'timestamp': '2025-10-01 04:32:57.406106', 'step': 11730, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:57.460720', 'step': 11730, 'epoch': 2} {'type': 'loss', 'content': 0.10718166083097458, 'timestamp': '2025-10-01 04:32:57.463109', 'step': 11731, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:57.518013', 'step': 11731, 'epoch': 2} {'type': 'loss', 'content': 0.13530273735523224, 'timestamp': '2025-10-01 04:32:57.524077', 'step': 11732, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:57.578027', 'step': 11732, 'epoch': 2} {'type': 'loss', 'content': 0.14308850467205048, 'timestamp': '2025-10-01 04:32:57.581528', 'step': 11733, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:57.636312', 'step': 11733, 'epoch': 2} {'type': 'loss', 'content': 0.17690008878707886, 'timestamp': '2025-10-01 04:32:57.638938', 'step': 11734, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:32:57.693583', 'step': 11734, 'epoch': 2} {'type': 'loss', 'content': 0.10372868180274963, 'timestamp': '2025-10-01 04:32:57.695898', 'step': 11735, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:57.750135', 'step': 11735, 'epoch': 2} {'type': 'loss', 'content': 0.11222831904888153, 'timestamp': '2025-10-01 04:32:57.756364', 'step': 11736, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:57.810198', 'step': 11736, 'epoch': 2} {'type': 'loss', 'content': 0.09606954455375671, 'timestamp': '2025-10-01 04:32:57.812309', 'step': 11737, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:32:57.865598', 'step': 11737, 'epoch': 2} {'type': 'loss', 'content': 0.059199873358011246, 'timestamp': '2025-10-01 04:32:57.867875', 'step': 11738, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:32:57.929184', 'step': 11738, 'epoch': 2} {'type': 'loss', 'content': 0.12709370255470276, 'timestamp': '2025-10-01 04:32:57.931567', 'step': 11739, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:57.986135', 'step': 11739, 'epoch': 2} {'type': 'loss', 'content': 0.1950998306274414, 'timestamp': '2025-10-01 04:32:57.992376', 'step': 11740, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:58.045735', 'step': 11740, 'epoch': 2} {'type': 'loss', 'content': 0.11165890842676163, 'timestamp': '2025-10-01 04:32:58.048296', 'step': 11741, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:58.102551', 'step': 11741, 'epoch': 2} {'type': 'loss', 'content': 0.05713306739926338, 'timestamp': '2025-10-01 04:32:58.105063', 'step': 11742, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:32:58.159678', 'step': 11742, 'epoch': 2} {'type': 'loss', 'content': 0.09688197076320648, 'timestamp': '2025-10-01 04:32:58.161783', 'step': 11743, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:58.215319', 'step': 11743, 'epoch': 2} {'type': 'loss', 'content': 0.13209617137908936, 'timestamp': '2025-10-01 04:32:58.221552', 'step': 11744, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:58.275199', 'step': 11744, 'epoch': 2} {'type': 'loss', 'content': 0.13658007979393005, 'timestamp': '2025-10-01 04:32:58.277250', 'step': 11745, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:58.330752', 'step': 11745, 'epoch': 2} {'type': 'loss', 'content': 0.10356532037258148, 'timestamp': '2025-10-01 04:32:58.333438', 'step': 11746, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:32:58.390686', 'step': 11746, 'epoch': 2} {'type': 'loss', 'content': 0.15676608681678772, 'timestamp': '2025-10-01 04:32:58.392925', 'step': 11747, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:58.447253', 'step': 11747, 'epoch': 2} {'type': 'loss', 'content': 0.10989505052566528, 'timestamp': '2025-10-01 04:32:58.453218', 'step': 11748, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:58.506164', 'step': 11748, 'epoch': 2} {'type': 'loss', 'content': 0.0958314761519432, 'timestamp': '2025-10-01 04:32:58.508297', 'step': 11749, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:58.561719', 'step': 11749, 'epoch': 2} {'type': 'loss', 'content': 0.22518962621688843, 'timestamp': '2025-10-01 04:32:58.563624', 'step': 11750, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:58.616720', 'step': 11750, 'epoch': 2} {'type': 'loss', 'content': 0.0873904675245285, 'timestamp': '2025-10-01 04:32:58.618915', 'step': 11751, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:58.671999', 'step': 11751, 'epoch': 2} {'type': 'loss', 'content': 0.08601907640695572, 'timestamp': '2025-10-01 04:32:58.677846', 'step': 11752, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:58.730115', 'step': 11752, 'epoch': 2} {'type': 'loss', 'content': 0.21632297337055206, 'timestamp': '2025-10-01 04:32:58.732176', 'step': 11753, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:58.785433', 'step': 11753, 'epoch': 2} {'type': 'loss', 'content': 0.09078057110309601, 'timestamp': '2025-10-01 04:32:58.787433', 'step': 11754, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:32:58.841830', 'step': 11754, 'epoch': 2} {'type': 'loss', 'content': 0.0928230732679367, 'timestamp': '2025-10-01 04:32:58.844080', 'step': 11755, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:58.897536', 'step': 11755, 'epoch': 2} {'type': 'loss', 'content': 0.1252906173467636, 'timestamp': '2025-10-01 04:32:58.903018', 'step': 11756, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:58.955532', 'step': 11756, 'epoch': 2} {'type': 'loss', 'content': 0.14407896995544434, 'timestamp': '2025-10-01 04:32:58.957576', 'step': 11757, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:32:59.010586', 'step': 11757, 'epoch': 2} {'type': 'loss', 'content': 0.11184699833393097, 'timestamp': '2025-10-01 04:32:59.012703', 'step': 11758, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:59.066084', 'step': 11758, 'epoch': 2} {'type': 'loss', 'content': 0.19276821613311768, 'timestamp': '2025-10-01 04:32:59.068034', 'step': 11759, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:59.120738', 'step': 11759, 'epoch': 2} {'type': 'loss', 'content': 0.13774308562278748, 'timestamp': '2025-10-01 04:32:59.126439', 'step': 11760, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:32:59.181197', 'step': 11760, 'epoch': 2} {'type': 'loss', 'content': 0.16346238553524017, 'timestamp': '2025-10-01 04:32:59.185563', 'step': 11761, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:32:59.241974', 'step': 11761, 'epoch': 2} {'type': 'loss', 'content': 0.08069238066673279, 'timestamp': '2025-10-01 04:32:59.244040', 'step': 11762, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:59.297781', 'step': 11762, 'epoch': 2} {'type': 'loss', 'content': 0.16238994896411896, 'timestamp': '2025-10-01 04:32:59.300458', 'step': 11763, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:59.354148', 'step': 11763, 'epoch': 2} {'type': 'loss', 'content': 0.10514393448829651, 'timestamp': '2025-10-01 04:32:59.359754', 'step': 11764, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:59.413693', 'step': 11764, 'epoch': 2} {'type': 'loss', 'content': 0.16492147743701935, 'timestamp': '2025-10-01 04:32:59.415728', 'step': 11765, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:59.468405', 'step': 11765, 'epoch': 2} {'type': 'loss', 'content': 0.06633758544921875, 'timestamp': '2025-10-01 04:32:59.470396', 'step': 11766, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:32:59.523558', 'step': 11766, 'epoch': 2} {'type': 'loss', 'content': 0.10200756043195724, 'timestamp': '2025-10-01 04:32:59.526071', 'step': 11767, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:32:59.579651', 'step': 11767, 'epoch': 2} {'type': 'loss', 'content': 0.13531732559204102, 'timestamp': '2025-10-01 04:32:59.585154', 'step': 11768, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:59.638188', 'step': 11768, 'epoch': 2} {'type': 'loss', 'content': 0.12198664247989655, 'timestamp': '2025-10-01 04:32:59.640250', 'step': 11769, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:59.694460', 'step': 11769, 'epoch': 2} {'type': 'loss', 'content': 0.15910665690898895, 'timestamp': '2025-10-01 04:32:59.696454', 'step': 11770, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:32:59.749842', 'step': 11770, 'epoch': 2} {'type': 'loss', 'content': 0.025252429768443108, 'timestamp': '2025-10-01 04:32:59.754834', 'step': 11771, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:59.809004', 'step': 11771, 'epoch': 2} {'type': 'loss', 'content': 0.12776407599449158, 'timestamp': '2025-10-01 04:32:59.814787', 'step': 11772, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:59.867831', 'step': 11772, 'epoch': 2} {'type': 'loss', 'content': 0.08653868734836578, 'timestamp': '2025-10-01 04:32:59.869685', 'step': 11773, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:32:59.922898', 'step': 11773, 'epoch': 2} {'type': 'loss', 'content': 0.18729223310947418, 'timestamp': '2025-10-01 04:32:59.925080', 'step': 11774, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:32:59.978129', 'step': 11774, 'epoch': 2} {'type': 'loss', 'content': 0.18077875673770905, 'timestamp': '2025-10-01 04:32:59.980025', 'step': 11775, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:00.032996', 'step': 11775, 'epoch': 2} {'type': 'loss', 'content': 0.09904889762401581, 'timestamp': '2025-10-01 04:33:00.038729', 'step': 11776, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:33:00.091481', 'step': 11776, 'epoch': 2} {'type': 'loss', 'content': 0.2334681898355484, 'timestamp': '2025-10-01 04:33:00.093744', 'step': 11777, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:33:00.146942', 'step': 11777, 'epoch': 2} {'type': 'loss', 'content': 0.07102935761213303, 'timestamp': '2025-10-01 04:33:00.149148', 'step': 11778, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:00.209385', 'step': 11778, 'epoch': 2} {'type': 'loss', 'content': 0.10451114922761917, 'timestamp': '2025-10-01 04:33:00.211392', 'step': 11779, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:00.264078', 'step': 11779, 'epoch': 2} {'type': 'loss', 'content': 0.1494292914867401, 'timestamp': '2025-10-01 04:33:00.269681', 'step': 11780, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:00.321998', 'step': 11780, 'epoch': 2} {'type': 'loss', 'content': 0.09484227001667023, 'timestamp': '2025-10-01 04:33:00.323875', 'step': 11781, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:00.376154', 'step': 11781, 'epoch': 2} {'type': 'loss', 'content': 0.14164569973945618, 'timestamp': '2025-10-01 04:33:00.378298', 'step': 11782, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:00.431457', 'step': 11782, 'epoch': 2} {'type': 'loss', 'content': 0.1554948389530182, 'timestamp': '2025-10-01 04:33:00.433605', 'step': 11783, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:33:00.492205', 'step': 11783, 'epoch': 2} {'type': 'loss', 'content': 0.1678490936756134, 'timestamp': '2025-10-01 04:33:00.497718', 'step': 11784, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:33:00.549938', 'step': 11784, 'epoch': 2} {'type': 'loss', 'content': 0.10836146026849747, 'timestamp': '2025-10-01 04:33:00.552872', 'step': 11785, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:00.606144', 'step': 11785, 'epoch': 2} {'type': 'loss', 'content': 0.1107201874256134, 'timestamp': '2025-10-01 04:33:00.608412', 'step': 11786, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:00.665933', 'step': 11786, 'epoch': 2} {'type': 'loss', 'content': 0.19877435266971588, 'timestamp': '2025-10-01 04:33:00.667853', 'step': 11787, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:00.720541', 'step': 11787, 'epoch': 2} {'type': 'loss', 'content': 0.11259885877370834, 'timestamp': '2025-10-01 04:33:00.726201', 'step': 11788, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:00.778457', 'step': 11788, 'epoch': 2} {'type': 'loss', 'content': 0.11629520356655121, 'timestamp': '2025-10-01 04:33:00.780479', 'step': 11789, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:00.833097', 'step': 11789, 'epoch': 2} {'type': 'loss', 'content': 0.18039321899414062, 'timestamp': '2025-10-01 04:33:00.835143', 'step': 11790, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:00.888530', 'step': 11790, 'epoch': 2} {'type': 'loss', 'content': 0.18425387144088745, 'timestamp': '2025-10-01 04:33:00.890754', 'step': 11791, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:00.944925', 'step': 11791, 'epoch': 2} {'type': 'loss', 'content': 0.0719665139913559, 'timestamp': '2025-10-01 04:33:00.950670', 'step': 11792, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:01.003353', 'step': 11792, 'epoch': 2} {'type': 'loss', 'content': 0.10549471527338028, 'timestamp': '2025-10-01 04:33:01.005533', 'step': 11793, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:01.058358', 'step': 11793, 'epoch': 2} {'type': 'loss', 'content': 0.07615090161561966, 'timestamp': '2025-10-01 04:33:01.060324', 'step': 11794, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:01.113256', 'step': 11794, 'epoch': 2} {'type': 'loss', 'content': 0.09857071936130524, 'timestamp': '2025-10-01 04:33:01.115438', 'step': 11795, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:01.167978', 'step': 11795, 'epoch': 2} {'type': 'loss', 'content': 0.13024640083312988, 'timestamp': '2025-10-01 04:33:01.173520', 'step': 11796, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:01.226114', 'step': 11796, 'epoch': 2} {'type': 'loss', 'content': 0.09079106897115707, 'timestamp': '2025-10-01 04:33:01.231930', 'step': 11797, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:01.285641', 'step': 11797, 'epoch': 2} {'type': 'loss', 'content': 0.11504412442445755, 'timestamp': '2025-10-01 04:33:01.289134', 'step': 11798, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:01.346763', 'step': 11798, 'epoch': 2} {'type': 'loss', 'content': 0.17538462579250336, 'timestamp': '2025-10-01 04:33:01.349033', 'step': 11799, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:01.401898', 'step': 11799, 'epoch': 2} {'type': 'loss', 'content': 0.10117929428815842, 'timestamp': '2025-10-01 04:33:01.408328', 'step': 11800, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:33:01.470291', 'step': 11800, 'epoch': 2} {'type': 'loss', 'content': 0.10527420043945312, 'timestamp': '2025-10-01 04:33:01.472319', 'step': 11801, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:33:01.529221', 'step': 11801, 'epoch': 2} {'type': 'loss', 'content': 0.1407487988471985, 'timestamp': '2025-10-01 04:33:01.531283', 'step': 11802, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:01.584348', 'step': 11802, 'epoch': 2} {'type': 'loss', 'content': 0.11703722923994064, 'timestamp': '2025-10-01 04:33:01.586553', 'step': 11803, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:01.638989', 'step': 11803, 'epoch': 2} {'type': 'loss', 'content': 0.13854767382144928, 'timestamp': '2025-10-01 04:33:01.644728', 'step': 11804, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:01.696844', 'step': 11804, 'epoch': 2} {'type': 'loss', 'content': 0.07441060245037079, 'timestamp': '2025-10-01 04:33:01.699149', 'step': 11805, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:01.752092', 'step': 11805, 'epoch': 2} {'type': 'loss', 'content': 0.15324951708316803, 'timestamp': '2025-10-01 04:33:01.756657', 'step': 11806, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:33:01.810222', 'step': 11806, 'epoch': 2} {'type': 'loss', 'content': 0.1496295928955078, 'timestamp': '2025-10-01 04:33:01.812437', 'step': 11807, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:01.866371', 'step': 11807, 'epoch': 2} {'type': 'loss', 'content': 0.16577240824699402, 'timestamp': '2025-10-01 04:33:01.872204', 'step': 11808, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:01.925664', 'step': 11808, 'epoch': 2} {'type': 'loss', 'content': 0.10710862278938293, 'timestamp': '2025-10-01 04:33:01.927776', 'step': 11809, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:01.980802', 'step': 11809, 'epoch': 2} {'type': 'loss', 'content': 0.16679488122463226, 'timestamp': '2025-10-01 04:33:01.982879', 'step': 11810, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:33:02.035536', 'step': 11810, 'epoch': 2} {'type': 'loss', 'content': 0.10661487281322479, 'timestamp': '2025-10-01 04:33:02.037766', 'step': 11811, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:02.093405', 'step': 11811, 'epoch': 2} {'type': 'loss', 'content': 0.128978431224823, 'timestamp': '2025-10-01 04:33:02.099054', 'step': 11812, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:02.151825', 'step': 11812, 'epoch': 2} {'type': 'loss', 'content': 0.11885733902454376, 'timestamp': '2025-10-01 04:33:02.153941', 'step': 11813, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:02.207641', 'step': 11813, 'epoch': 2} {'type': 'loss', 'content': 0.11019236594438553, 'timestamp': '2025-10-01 04:33:02.210596', 'step': 11814, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:02.267652', 'step': 11814, 'epoch': 2} {'type': 'loss', 'content': 0.19859126210212708, 'timestamp': '2025-10-01 04:33:02.269592', 'step': 11815, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:02.322099', 'step': 11815, 'epoch': 2} {'type': 'loss', 'content': 0.04385410249233246, 'timestamp': '2025-10-01 04:33:02.327790', 'step': 11816, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:02.380506', 'step': 11816, 'epoch': 2} {'type': 'loss', 'content': 0.20623211562633514, 'timestamp': '2025-10-01 04:33:02.382743', 'step': 11817, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:02.435812', 'step': 11817, 'epoch': 2} {'type': 'loss', 'content': 0.13778534531593323, 'timestamp': '2025-10-01 04:33:02.437892', 'step': 11818, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:02.490908', 'step': 11818, 'epoch': 2} {'type': 'loss', 'content': 0.20921677350997925, 'timestamp': '2025-10-01 04:33:02.493518', 'step': 11819, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:02.546490', 'step': 11819, 'epoch': 2} {'type': 'loss', 'content': 0.03687352314591408, 'timestamp': '2025-10-01 04:33:02.552289', 'step': 11820, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:33:02.607562', 'step': 11820, 'epoch': 2} {'type': 'loss', 'content': 0.03330956771969795, 'timestamp': '2025-10-01 04:33:02.609876', 'step': 11821, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:02.662918', 'step': 11821, 'epoch': 2} {'type': 'loss', 'content': 0.17739450931549072, 'timestamp': '2025-10-01 04:33:02.665190', 'step': 11822, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:02.718377', 'step': 11822, 'epoch': 2} {'type': 'loss', 'content': 0.06743577867746353, 'timestamp': '2025-10-01 04:33:02.720374', 'step': 11823, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:33:02.773868', 'step': 11823, 'epoch': 2} {'type': 'loss', 'content': 0.13881021738052368, 'timestamp': '2025-10-01 04:33:02.779474', 'step': 11824, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:02.831922', 'step': 11824, 'epoch': 2} {'type': 'loss', 'content': 0.12115995585918427, 'timestamp': '2025-10-01 04:33:02.834158', 'step': 11825, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:02.886924', 'step': 11825, 'epoch': 2} {'type': 'loss', 'content': 0.1556536704301834, 'timestamp': '2025-10-01 04:33:02.888997', 'step': 11826, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:02.941729', 'step': 11826, 'epoch': 2} {'type': 'loss', 'content': 0.10538456588983536, 'timestamp': '2025-10-01 04:33:02.943921', 'step': 11827, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:33:02.997300', 'step': 11827, 'epoch': 2} {'type': 'loss', 'content': 0.21716392040252686, 'timestamp': '2025-10-01 04:33:03.002833', 'step': 11828, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:03.055272', 'step': 11828, 'epoch': 2} {'type': 'loss', 'content': 0.09682567417621613, 'timestamp': '2025-10-01 04:33:03.057662', 'step': 11829, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:33:03.110494', 'step': 11829, 'epoch': 2} {'type': 'loss', 'content': 0.25858545303344727, 'timestamp': '2025-10-01 04:33:03.112817', 'step': 11830, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:03.166384', 'step': 11830, 'epoch': 2} {'type': 'loss', 'content': 0.1328633427619934, 'timestamp': '2025-10-01 04:33:03.168570', 'step': 11831, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:03.221600', 'step': 11831, 'epoch': 2} {'type': 'loss', 'content': 0.199445441365242, 'timestamp': '2025-10-01 04:33:03.227256', 'step': 11832, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:03.279644', 'step': 11832, 'epoch': 2} {'type': 'loss', 'content': 0.1101735457777977, 'timestamp': '2025-10-01 04:33:03.281971', 'step': 11833, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:03.335054', 'step': 11833, 'epoch': 2} {'type': 'loss', 'content': 0.22212690114974976, 'timestamp': '2025-10-01 04:33:03.337316', 'step': 11834, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:03.390201', 'step': 11834, 'epoch': 2} {'type': 'loss', 'content': 0.2063620388507843, 'timestamp': '2025-10-01 04:33:03.392401', 'step': 11835, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:03.445320', 'step': 11835, 'epoch': 2} {'type': 'loss', 'content': 0.07678663730621338, 'timestamp': '2025-10-01 04:33:03.451031', 'step': 11836, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:33:03.503941', 'step': 11836, 'epoch': 2} {'type': 'loss', 'content': 0.07142170518636703, 'timestamp': '2025-10-01 04:33:03.505947', 'step': 11837, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:03.558831', 'step': 11837, 'epoch': 2} {'type': 'loss', 'content': 0.0982169508934021, 'timestamp': '2025-10-01 04:33:03.560938', 'step': 11838, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:33:03.615078', 'step': 11838, 'epoch': 2} {'type': 'loss', 'content': 0.1495760977268219, 'timestamp': '2025-10-01 04:33:03.617117', 'step': 11839, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:03.670585', 'step': 11839, 'epoch': 2} {'type': 'loss', 'content': 0.119914211332798, 'timestamp': '2025-10-01 04:33:03.676486', 'step': 11840, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:03.729588', 'step': 11840, 'epoch': 2} {'type': 'loss', 'content': 0.20622216165065765, 'timestamp': '2025-10-01 04:33:03.731939', 'step': 11841, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:03.785586', 'step': 11841, 'epoch': 2} {'type': 'loss', 'content': 0.2040736973285675, 'timestamp': '2025-10-01 04:33:03.787902', 'step': 11842, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:03.841526', 'step': 11842, 'epoch': 2} {'type': 'loss', 'content': 0.10866380482912064, 'timestamp': '2025-10-01 04:33:03.843699', 'step': 11843, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:03.897919', 'step': 11843, 'epoch': 2} {'type': 'loss', 'content': 0.05308579280972481, 'timestamp': '2025-10-01 04:33:03.903499', 'step': 11844, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:03.956074', 'step': 11844, 'epoch': 2} {'type': 'loss', 'content': 0.14969664812088013, 'timestamp': '2025-10-01 04:33:03.958368', 'step': 11845, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:04.011377', 'step': 11845, 'epoch': 2} {'type': 'loss', 'content': 0.15565992891788483, 'timestamp': '2025-10-01 04:33:04.013494', 'step': 11846, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:04.066599', 'step': 11846, 'epoch': 2} {'type': 'loss', 'content': 0.16222670674324036, 'timestamp': '2025-10-01 04:33:04.068821', 'step': 11847, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:04.121987', 'step': 11847, 'epoch': 2} {'type': 'loss', 'content': 0.04216403141617775, 'timestamp': '2025-10-01 04:33:04.127655', 'step': 11848, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:04.180711', 'step': 11848, 'epoch': 2} {'type': 'loss', 'content': 0.12075263261795044, 'timestamp': '2025-10-01 04:33:04.183008', 'step': 11849, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:04.236906', 'step': 11849, 'epoch': 2} {'type': 'loss', 'content': 0.0992540717124939, 'timestamp': '2025-10-01 04:33:04.239449', 'step': 11850, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:04.293035', 'step': 11850, 'epoch': 2} {'type': 'loss', 'content': 0.12494337558746338, 'timestamp': '2025-10-01 04:33:04.295166', 'step': 11851, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:04.347756', 'step': 11851, 'epoch': 2} {'type': 'loss', 'content': 0.1173689216375351, 'timestamp': '2025-10-01 04:33:04.353647', 'step': 11852, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:04.409194', 'step': 11852, 'epoch': 2} {'type': 'loss', 'content': 0.05385630577802658, 'timestamp': '2025-10-01 04:33:04.411037', 'step': 11853, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:04.463850', 'step': 11853, 'epoch': 2} {'type': 'loss', 'content': 0.14080817997455597, 'timestamp': '2025-10-01 04:33:04.465955', 'step': 11854, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:04.519003', 'step': 11854, 'epoch': 2} {'type': 'loss', 'content': 0.11017388105392456, 'timestamp': '2025-10-01 04:33:04.521354', 'step': 11855, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:04.574397', 'step': 11855, 'epoch': 2} {'type': 'loss', 'content': 0.1780085563659668, 'timestamp': '2025-10-01 04:33:04.580068', 'step': 11856, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:04.632149', 'step': 11856, 'epoch': 2} {'type': 'loss', 'content': 0.11752882599830627, 'timestamp': '2025-10-01 04:33:04.634323', 'step': 11857, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:04.687425', 'step': 11857, 'epoch': 2} {'type': 'loss', 'content': 0.11636963486671448, 'timestamp': '2025-10-01 04:33:04.691350', 'step': 11858, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:04.745342', 'step': 11858, 'epoch': 2} {'type': 'loss', 'content': 0.1332513839006424, 'timestamp': '2025-10-01 04:33:04.747874', 'step': 11859, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:04.801797', 'step': 11859, 'epoch': 2} {'type': 'loss', 'content': 0.15753048658370972, 'timestamp': '2025-10-01 04:33:04.807979', 'step': 11860, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:04.860988', 'step': 11860, 'epoch': 2} {'type': 'loss', 'content': 0.13110756874084473, 'timestamp': '2025-10-01 04:33:04.863555', 'step': 11861, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:04.917196', 'step': 11861, 'epoch': 2} {'type': 'loss', 'content': 0.224889874458313, 'timestamp': '2025-10-01 04:33:04.921242', 'step': 11862, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:33:04.977587', 'step': 11862, 'epoch': 2} {'type': 'loss', 'content': 0.15961527824401855, 'timestamp': '2025-10-01 04:33:04.979809', 'step': 11863, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:05.033894', 'step': 11863, 'epoch': 2} {'type': 'loss', 'content': 0.12869279086589813, 'timestamp': '2025-10-01 04:33:05.039689', 'step': 11864, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:05.092231', 'step': 11864, 'epoch': 2} {'type': 'loss', 'content': 0.047039441764354706, 'timestamp': '2025-10-01 04:33:05.094334', 'step': 11865, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:05.147313', 'step': 11865, 'epoch': 2} {'type': 'loss', 'content': 0.13697397708892822, 'timestamp': '2025-10-01 04:33:05.149579', 'step': 11866, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:05.202860', 'step': 11866, 'epoch': 2} {'type': 'loss', 'content': 0.07913721352815628, 'timestamp': '2025-10-01 04:33:05.205225', 'step': 11867, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:33:05.258298', 'step': 11867, 'epoch': 2} {'type': 'loss', 'content': 0.12900985777378082, 'timestamp': '2025-10-01 04:33:05.264113', 'step': 11868, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:05.316613', 'step': 11868, 'epoch': 2} {'type': 'loss', 'content': 0.09101726859807968, 'timestamp': '2025-10-01 04:33:05.318695', 'step': 11869, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:05.371301', 'step': 11869, 'epoch': 2} {'type': 'loss', 'content': 0.09579159319400787, 'timestamp': '2025-10-01 04:33:05.373405', 'step': 11870, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:33:05.427026', 'step': 11870, 'epoch': 2} {'type': 'loss', 'content': 0.16158753633499146, 'timestamp': '2025-10-01 04:33:05.429283', 'step': 11871, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:33:05.482417', 'step': 11871, 'epoch': 2} {'type': 'loss', 'content': 0.09738370776176453, 'timestamp': '2025-10-01 04:33:05.488451', 'step': 11872, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:33:05.545986', 'step': 11872, 'epoch': 2} {'type': 'loss', 'content': 0.14672763645648956, 'timestamp': '2025-10-01 04:33:05.548076', 'step': 11873, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:05.601232', 'step': 11873, 'epoch': 2} {'type': 'loss', 'content': 0.1674177050590515, 'timestamp': '2025-10-01 04:33:05.603357', 'step': 11874, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:05.656689', 'step': 11874, 'epoch': 2} {'type': 'loss', 'content': 0.04727448895573616, 'timestamp': '2025-10-01 04:33:05.658842', 'step': 11875, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:05.725706', 'step': 11875, 'epoch': 2} {'type': 'loss', 'content': 0.10373321175575256, 'timestamp': '2025-10-01 04:33:05.731589', 'step': 11876, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:05.784205', 'step': 11876, 'epoch': 2} {'type': 'loss', 'content': 0.08134228736162186, 'timestamp': '2025-10-01 04:33:05.786448', 'step': 11877, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:05.839106', 'step': 11877, 'epoch': 2} {'type': 'loss', 'content': 0.13358131051063538, 'timestamp': '2025-10-01 04:33:05.841350', 'step': 11878, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:05.894520', 'step': 11878, 'epoch': 2} {'type': 'loss', 'content': 0.10293979197740555, 'timestamp': '2025-10-01 04:33:05.896609', 'step': 11879, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:05.949804', 'step': 11879, 'epoch': 2} {'type': 'loss', 'content': 0.1641482412815094, 'timestamp': '2025-10-01 04:33:05.955585', 'step': 11880, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:06.008094', 'step': 11880, 'epoch': 2} {'type': 'loss', 'content': 0.10849341750144958, 'timestamp': '2025-10-01 04:33:06.010196', 'step': 11881, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:33:06.063447', 'step': 11881, 'epoch': 2} {'type': 'loss', 'content': 0.14827871322631836, 'timestamp': '2025-10-01 04:33:06.066403', 'step': 11882, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:06.119382', 'step': 11882, 'epoch': 2} {'type': 'loss', 'content': 0.170166477560997, 'timestamp': '2025-10-01 04:33:06.121370', 'step': 11883, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:33:06.174390', 'step': 11883, 'epoch': 2} {'type': 'loss', 'content': 0.13674576580524445, 'timestamp': '2025-10-01 04:33:06.180258', 'step': 11884, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:06.232756', 'step': 11884, 'epoch': 2} {'type': 'loss', 'content': 0.15218786895275116, 'timestamp': '2025-10-01 04:33:06.234897', 'step': 11885, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:06.289069', 'step': 11885, 'epoch': 2} {'type': 'loss', 'content': 0.18395236134529114, 'timestamp': '2025-10-01 04:33:06.291158', 'step': 11886, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:06.344240', 'step': 11886, 'epoch': 2} {'type': 'loss', 'content': 0.15266689658164978, 'timestamp': '2025-10-01 04:33:06.346356', 'step': 11887, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:06.399633', 'step': 11887, 'epoch': 2} {'type': 'loss', 'content': 0.069794662296772, 'timestamp': '2025-10-01 04:33:06.406778', 'step': 11888, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:06.462285', 'step': 11888, 'epoch': 2} {'type': 'loss', 'content': 0.16307422518730164, 'timestamp': '2025-10-01 04:33:06.464517', 'step': 11889, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:06.517442', 'step': 11889, 'epoch': 2} {'type': 'loss', 'content': 0.11380645632743835, 'timestamp': '2025-10-01 04:33:06.519553', 'step': 11890, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:06.572972', 'step': 11890, 'epoch': 2} {'type': 'loss', 'content': 0.1320362091064453, 'timestamp': '2025-10-01 04:33:06.575114', 'step': 11891, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:06.628295', 'step': 11891, 'epoch': 2} {'type': 'loss', 'content': 0.19549013674259186, 'timestamp': '2025-10-01 04:33:06.645658', 'step': 11892, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:06.700927', 'step': 11892, 'epoch': 2} {'type': 'loss', 'content': 0.13295160233974457, 'timestamp': '2025-10-01 04:33:06.708073', 'step': 11893, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:06.762687', 'step': 11893, 'epoch': 2} {'type': 'loss', 'content': 0.13359574973583221, 'timestamp': '2025-10-01 04:33:06.765497', 'step': 11894, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:06.819378', 'step': 11894, 'epoch': 2} {'type': 'loss', 'content': 0.23350541293621063, 'timestamp': '2025-10-01 04:33:06.822303', 'step': 11895, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:06.878056', 'step': 11895, 'epoch': 2} {'type': 'loss', 'content': 0.055572379380464554, 'timestamp': '2025-10-01 04:33:06.883460', 'step': 11896, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:06.937102', 'step': 11896, 'epoch': 2} {'type': 'loss', 'content': 0.17654912173748016, 'timestamp': '2025-10-01 04:33:06.952894', 'step': 11897, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:07.009384', 'step': 11897, 'epoch': 2} {'type': 'loss', 'content': 0.08859600871801376, 'timestamp': '2025-10-01 04:33:07.014391', 'step': 11898, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:07.073750', 'step': 11898, 'epoch': 2} {'type': 'loss', 'content': 0.1244230717420578, 'timestamp': '2025-10-01 04:33:07.076461', 'step': 11899, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:07.132255', 'step': 11899, 'epoch': 2} {'type': 'loss', 'content': 0.11808070540428162, 'timestamp': '2025-10-01 04:33:07.139232', 'step': 11900, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:33:07.194786', 'step': 11900, 'epoch': 2} {'type': 'loss', 'content': 0.2629462778568268, 'timestamp': '2025-10-01 04:33:07.196902', 'step': 11901, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:07.272906', 'step': 11901, 'epoch': 2} {'type': 'loss', 'content': 0.12578575313091278, 'timestamp': '2025-10-01 04:33:07.277745', 'step': 11902, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:07.342013', 'step': 11902, 'epoch': 2} {'type': 'loss', 'content': 0.13309194147586823, 'timestamp': '2025-10-01 04:33:07.344451', 'step': 11903, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:07.400647', 'step': 11903, 'epoch': 2} {'type': 'loss', 'content': 0.20031535625457764, 'timestamp': '2025-10-01 04:33:07.408028', 'step': 11904, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:07.468068', 'step': 11904, 'epoch': 2} {'type': 'loss', 'content': 0.13169507682323456, 'timestamp': '2025-10-01 04:33:07.482443', 'step': 11905, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:07.541761', 'step': 11905, 'epoch': 2} {'type': 'loss', 'content': 0.10367832332849503, 'timestamp': '2025-10-01 04:33:07.545049', 'step': 11906, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:07.616845', 'step': 11906, 'epoch': 2} {'type': 'loss', 'content': 0.06875807791948318, 'timestamp': '2025-10-01 04:33:07.623340', 'step': 11907, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:07.678897', 'step': 11907, 'epoch': 2} {'type': 'loss', 'content': 0.12839797139167786, 'timestamp': '2025-10-01 04:33:07.689776', 'step': 11908, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:07.749091', 'step': 11908, 'epoch': 2} {'type': 'loss', 'content': 0.07003943622112274, 'timestamp': '2025-10-01 04:33:07.754547', 'step': 11909, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:33:07.814064', 'step': 11909, 'epoch': 2} {'type': 'loss', 'content': 0.22376587986946106, 'timestamp': '2025-10-01 04:33:07.822556', 'step': 11910, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:07.886615', 'step': 11910, 'epoch': 2} {'type': 'loss', 'content': 0.16008847951889038, 'timestamp': '2025-10-01 04:33:07.896936', 'step': 11911, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:33:07.974012', 'step': 11911, 'epoch': 2} {'type': 'loss', 'content': 0.12669938802719116, 'timestamp': '2025-10-01 04:33:07.979781', 'step': 11912, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:08.033175', 'step': 11912, 'epoch': 2} {'type': 'loss', 'content': 0.10513844341039658, 'timestamp': '2025-10-01 04:33:08.035352', 'step': 11913, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:08.090004', 'step': 11913, 'epoch': 2} {'type': 'loss', 'content': 0.12735633552074432, 'timestamp': '2025-10-01 04:33:08.092204', 'step': 11914, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:08.146260', 'step': 11914, 'epoch': 2} {'type': 'loss', 'content': 0.23967009782791138, 'timestamp': '2025-10-01 04:33:08.148361', 'step': 11915, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:08.202341', 'step': 11915, 'epoch': 2} {'type': 'loss', 'content': 0.08701921254396439, 'timestamp': '2025-10-01 04:33:08.208452', 'step': 11916, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:08.261592', 'step': 11916, 'epoch': 2} {'type': 'loss', 'content': 0.060143567621707916, 'timestamp': '2025-10-01 04:33:08.263715', 'step': 11917, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:08.317940', 'step': 11917, 'epoch': 2} {'type': 'loss', 'content': 0.12572522461414337, 'timestamp': '2025-10-01 04:33:08.320056', 'step': 11918, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:08.374036', 'step': 11918, 'epoch': 2} {'type': 'loss', 'content': 0.10456592589616776, 'timestamp': '2025-10-01 04:33:08.377189', 'step': 11919, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:08.438368', 'step': 11919, 'epoch': 2} {'type': 'loss', 'content': 0.16495470702648163, 'timestamp': '2025-10-01 04:33:08.446144', 'step': 11920, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:08.502220', 'step': 11920, 'epoch': 2} {'type': 'loss', 'content': 0.13939514756202698, 'timestamp': '2025-10-01 04:33:08.505785', 'step': 11921, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:08.560625', 'step': 11921, 'epoch': 2} {'type': 'loss', 'content': 0.06990301609039307, 'timestamp': '2025-10-01 04:33:08.562645', 'step': 11922, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:08.615486', 'step': 11922, 'epoch': 2} {'type': 'loss', 'content': 0.15742947161197662, 'timestamp': '2025-10-01 04:33:08.617497', 'step': 11923, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:08.670266', 'step': 11923, 'epoch': 2} {'type': 'loss', 'content': 0.18145398795604706, 'timestamp': '2025-10-01 04:33:08.676177', 'step': 11924, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:08.741870', 'step': 11924, 'epoch': 2} {'type': 'loss', 'content': 0.07607243210077286, 'timestamp': '2025-10-01 04:33:08.744170', 'step': 11925, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:08.797404', 'step': 11925, 'epoch': 2} {'type': 'loss', 'content': 0.15637996792793274, 'timestamp': '2025-10-01 04:33:08.799570', 'step': 11926, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:08.852934', 'step': 11926, 'epoch': 2} {'type': 'loss', 'content': 0.12322290241718292, 'timestamp': '2025-10-01 04:33:08.854936', 'step': 11927, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-01 04:33:08.910448', 'step': 11927, 'epoch': 2} {'type': 'loss', 'content': 0.11627168953418732, 'timestamp': '2025-10-01 04:33:08.916165', 'step': 11928, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:08.969610', 'step': 11928, 'epoch': 2} {'type': 'loss', 'content': 0.05008124187588692, 'timestamp': '2025-10-01 04:33:08.971795', 'step': 11929, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:09.028805', 'step': 11929, 'epoch': 2} {'type': 'loss', 'content': 0.1093968078494072, 'timestamp': '2025-10-01 04:33:09.031121', 'step': 11930, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:09.093973', 'step': 11930, 'epoch': 2} {'type': 'loss', 'content': 0.16261741518974304, 'timestamp': '2025-10-01 04:33:09.096047', 'step': 11931, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:09.149106', 'step': 11931, 'epoch': 2} {'type': 'loss', 'content': 0.08907236158847809, 'timestamp': '2025-10-01 04:33:09.155000', 'step': 11932, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:09.207192', 'step': 11932, 'epoch': 2} {'type': 'loss', 'content': 0.18925178050994873, 'timestamp': '2025-10-01 04:33:09.216207', 'step': 11933, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:33:09.269193', 'step': 11933, 'epoch': 2} {'type': 'loss', 'content': 0.06378946453332901, 'timestamp': '2025-10-01 04:33:09.270875', 'step': 11934, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:09.325011', 'step': 11934, 'epoch': 2} {'type': 'loss', 'content': 0.14817385375499725, 'timestamp': '2025-10-01 04:33:09.327008', 'step': 11935, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:33:09.381303', 'step': 11935, 'epoch': 2} {'type': 'loss', 'content': 0.12538574635982513, 'timestamp': '2025-10-01 04:33:09.387762', 'step': 11936, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:09.442286', 'step': 11936, 'epoch': 2} {'type': 'loss', 'content': 0.11599718779325485, 'timestamp': '2025-10-01 04:33:09.444655', 'step': 11937, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:09.499194', 'step': 11937, 'epoch': 2} {'type': 'loss', 'content': 0.08400868624448776, 'timestamp': '2025-10-01 04:33:09.501445', 'step': 11938, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:09.555178', 'step': 11938, 'epoch': 2} {'type': 'loss', 'content': 0.05957767739892006, 'timestamp': '2025-10-01 04:33:09.557477', 'step': 11939, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:09.610474', 'step': 11939, 'epoch': 2} {'type': 'loss', 'content': 0.159056156873703, 'timestamp': '2025-10-01 04:33:09.616315', 'step': 11940, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:09.669575', 'step': 11940, 'epoch': 2} {'type': 'loss', 'content': 0.12685026228427887, 'timestamp': '2025-10-01 04:33:09.671745', 'step': 11941, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:09.725612', 'step': 11941, 'epoch': 2} {'type': 'loss', 'content': 0.09486481547355652, 'timestamp': '2025-10-01 04:33:09.727602', 'step': 11942, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:09.782012', 'step': 11942, 'epoch': 2} {'type': 'loss', 'content': 0.08800173550844193, 'timestamp': '2025-10-01 04:33:09.784183', 'step': 11943, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:09.842168', 'step': 11943, 'epoch': 2} {'type': 'loss', 'content': 0.12241581082344055, 'timestamp': '2025-10-01 04:33:09.848795', 'step': 11944, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:09.906209', 'step': 11944, 'epoch': 2} {'type': 'loss', 'content': 0.046490803360939026, 'timestamp': '2025-10-01 04:33:09.908491', 'step': 11945, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:09.964734', 'step': 11945, 'epoch': 2} {'type': 'loss', 'content': 0.16711296141147614, 'timestamp': '2025-10-01 04:33:09.966899', 'step': 11946, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:10.024640', 'step': 11946, 'epoch': 2} {'type': 'loss', 'content': 0.16829295456409454, 'timestamp': '2025-10-01 04:33:10.027793', 'step': 11947, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:33:10.085228', 'step': 11947, 'epoch': 2} {'type': 'loss', 'content': 0.07271213829517365, 'timestamp': '2025-10-01 04:33:10.091762', 'step': 11948, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:10.149361', 'step': 11948, 'epoch': 2} {'type': 'loss', 'content': 0.10227394104003906, 'timestamp': '2025-10-01 04:33:10.151595', 'step': 11949, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:33:10.210057', 'step': 11949, 'epoch': 2} {'type': 'loss', 'content': 0.09711417555809021, 'timestamp': '2025-10-01 04:33:10.212495', 'step': 11950, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:10.269824', 'step': 11950, 'epoch': 2} {'type': 'loss', 'content': 0.12249888479709625, 'timestamp': '2025-10-01 04:33:10.272277', 'step': 11951, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:10.329775', 'step': 11951, 'epoch': 2} {'type': 'loss', 'content': 0.07677987962961197, 'timestamp': '2025-10-01 04:33:10.336238', 'step': 11952, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:10.397828', 'step': 11952, 'epoch': 2} {'type': 'loss', 'content': 0.12332086265087128, 'timestamp': '2025-10-01 04:33:10.400161', 'step': 11953, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:10.456251', 'step': 11953, 'epoch': 2} {'type': 'loss', 'content': 0.05940616875886917, 'timestamp': '2025-10-01 04:33:10.458389', 'step': 11954, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:10.516390', 'step': 11954, 'epoch': 2} {'type': 'loss', 'content': 0.08679278939962387, 'timestamp': '2025-10-01 04:33:10.518481', 'step': 11955, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:10.583003', 'step': 11955, 'epoch': 2} {'type': 'loss', 'content': 0.12453167140483856, 'timestamp': '2025-10-01 04:33:10.589491', 'step': 11956, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:10.646260', 'step': 11956, 'epoch': 2} {'type': 'loss', 'content': 0.22268520295619965, 'timestamp': '2025-10-01 04:33:10.648413', 'step': 11957, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:10.705047', 'step': 11957, 'epoch': 2} {'type': 'loss', 'content': 0.1359754502773285, 'timestamp': '2025-10-01 04:33:10.707165', 'step': 11958, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:10.763367', 'step': 11958, 'epoch': 2} {'type': 'loss', 'content': 0.20107227563858032, 'timestamp': '2025-10-01 04:33:10.765459', 'step': 11959, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:10.824339', 'step': 11959, 'epoch': 2} {'type': 'loss', 'content': 0.0810118168592453, 'timestamp': '2025-10-01 04:33:10.831023', 'step': 11960, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:10.887603', 'step': 11960, 'epoch': 2} {'type': 'loss', 'content': 0.139780655503273, 'timestamp': '2025-10-01 04:33:10.889955', 'step': 11961, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:10.946066', 'step': 11961, 'epoch': 2} {'type': 'loss', 'content': 0.07095185667276382, 'timestamp': '2025-10-01 04:33:10.948237', 'step': 11962, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:11.005593', 'step': 11962, 'epoch': 2} {'type': 'loss', 'content': 0.21548792719841003, 'timestamp': '2025-10-01 04:33:11.008702', 'step': 11963, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:11.065917', 'step': 11963, 'epoch': 2} {'type': 'loss', 'content': 0.13438265025615692, 'timestamp': '2025-10-01 04:33:11.072656', 'step': 11964, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:11.129576', 'step': 11964, 'epoch': 2} {'type': 'loss', 'content': 0.1733199954032898, 'timestamp': '2025-10-01 04:33:11.132138', 'step': 11965, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:11.193132', 'step': 11965, 'epoch': 2} {'type': 'loss', 'content': 0.08292389661073685, 'timestamp': '2025-10-01 04:33:11.195002', 'step': 11966, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:11.248719', 'step': 11966, 'epoch': 2} {'type': 'loss', 'content': 0.21189741790294647, 'timestamp': '2025-10-01 04:33:11.250956', 'step': 11967, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:11.304056', 'step': 11967, 'epoch': 2} {'type': 'loss', 'content': 0.13423362374305725, 'timestamp': '2025-10-01 04:33:11.310057', 'step': 11968, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:11.362302', 'step': 11968, 'epoch': 2} {'type': 'loss', 'content': 0.1418595016002655, 'timestamp': '2025-10-01 04:33:11.364521', 'step': 11969, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:11.426687', 'step': 11969, 'epoch': 2} {'type': 'loss', 'content': 0.17627261579036713, 'timestamp': '2025-10-01 04:33:11.429389', 'step': 11970, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:11.484492', 'step': 11970, 'epoch': 2} {'type': 'loss', 'content': 0.11936940997838974, 'timestamp': '2025-10-01 04:33:11.486492', 'step': 11971, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:33:11.538994', 'step': 11971, 'epoch': 2} {'type': 'loss', 'content': 0.13494791090488434, 'timestamp': '2025-10-01 04:33:11.544734', 'step': 11972, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:11.597152', 'step': 11972, 'epoch': 2} {'type': 'loss', 'content': 0.1546822339296341, 'timestamp': '2025-10-01 04:33:11.599268', 'step': 11973, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:11.651993', 'step': 11973, 'epoch': 2} {'type': 'loss', 'content': 0.1867765188217163, 'timestamp': '2025-10-01 04:33:11.653842', 'step': 11974, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:11.707106', 'step': 11974, 'epoch': 2} {'type': 'loss', 'content': 0.12405690550804138, 'timestamp': '2025-10-01 04:33:11.711602', 'step': 11975, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:11.766056', 'step': 11975, 'epoch': 2} {'type': 'loss', 'content': 0.10416285693645477, 'timestamp': '2025-10-01 04:33:11.771574', 'step': 11976, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:11.823732', 'step': 11976, 'epoch': 2} {'type': 'loss', 'content': 0.10542100667953491, 'timestamp': '2025-10-01 04:33:11.825662', 'step': 11977, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:11.878773', 'step': 11977, 'epoch': 2} {'type': 'loss', 'content': 0.13334640860557556, 'timestamp': '2025-10-01 04:33:11.880801', 'step': 11978, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:11.934174', 'step': 11978, 'epoch': 2} {'type': 'loss', 'content': 0.1303602159023285, 'timestamp': '2025-10-01 04:33:11.936460', 'step': 11979, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:11.989035', 'step': 11979, 'epoch': 2} {'type': 'loss', 'content': 0.13783730566501617, 'timestamp': '2025-10-01 04:33:11.994879', 'step': 11980, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:12.046979', 'step': 11980, 'epoch': 2} {'type': 'loss', 'content': 0.12203624099493027, 'timestamp': '2025-10-01 04:33:12.048855', 'step': 11981, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:12.101466', 'step': 11981, 'epoch': 2} {'type': 'loss', 'content': 0.1623411774635315, 'timestamp': '2025-10-01 04:33:12.103541', 'step': 11982, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:12.156114', 'step': 11982, 'epoch': 2} {'type': 'loss', 'content': 0.10497238487005234, 'timestamp': '2025-10-01 04:33:12.158602', 'step': 11983, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:12.212459', 'step': 11983, 'epoch': 2} {'type': 'loss', 'content': 0.08859501779079437, 'timestamp': '2025-10-01 04:33:12.218131', 'step': 11984, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:33:12.269976', 'step': 11984, 'epoch': 2} {'type': 'loss', 'content': 0.08230727165937424, 'timestamp': '2025-10-01 04:33:12.276921', 'step': 11985, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:12.330720', 'step': 11985, 'epoch': 2} {'type': 'loss', 'content': 0.14685048162937164, 'timestamp': '2025-10-01 04:33:12.333163', 'step': 11986, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:12.386716', 'step': 11986, 'epoch': 2} {'type': 'loss', 'content': 0.12588481605052948, 'timestamp': '2025-10-01 04:33:12.389555', 'step': 11987, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:33:12.443749', 'step': 11987, 'epoch': 2} {'type': 'loss', 'content': 0.07364470511674881, 'timestamp': '2025-10-01 04:33:12.449723', 'step': 11988, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:33:12.502829', 'step': 11988, 'epoch': 2} {'type': 'loss', 'content': 0.13304239511489868, 'timestamp': '2025-10-01 04:33:12.505459', 'step': 11989, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:12.559680', 'step': 11989, 'epoch': 2} {'type': 'loss', 'content': 0.0973365381360054, 'timestamp': '2025-10-01 04:33:12.564025', 'step': 11990, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:33:12.618824', 'step': 11990, 'epoch': 2} {'type': 'loss', 'content': 0.11357543617486954, 'timestamp': '2025-10-01 04:33:12.620776', 'step': 11991, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:33:12.678598', 'step': 11991, 'epoch': 2} {'type': 'loss', 'content': 0.12659631669521332, 'timestamp': '2025-10-01 04:33:12.685074', 'step': 11992, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:33:12.739738', 'step': 11992, 'epoch': 2} {'type': 'loss', 'content': 0.058104339987039566, 'timestamp': '2025-10-01 04:33:12.741995', 'step': 11993, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:12.796540', 'step': 11993, 'epoch': 2} {'type': 'loss', 'content': 0.27279651165008545, 'timestamp': '2025-10-01 04:33:12.798814', 'step': 11994, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:12.853475', 'step': 11994, 'epoch': 2} {'type': 'loss', 'content': 0.1069554015994072, 'timestamp': '2025-10-01 04:33:12.856082', 'step': 11995, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:12.910179', 'step': 11995, 'epoch': 2} {'type': 'loss', 'content': 0.14275316894054413, 'timestamp': '2025-10-01 04:33:12.915895', 'step': 11996, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:12.969718', 'step': 11996, 'epoch': 2} {'type': 'loss', 'content': 0.10109100490808487, 'timestamp': '2025-10-01 04:33:12.975794', 'step': 11997, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:13.040249', 'step': 11997, 'epoch': 2} {'type': 'loss', 'content': 0.1801343709230423, 'timestamp': '2025-10-01 04:33:13.042888', 'step': 11998, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:13.097387', 'step': 11998, 'epoch': 2} {'type': 'loss', 'content': 0.07635580003261566, 'timestamp': '2025-10-01 04:33:13.099780', 'step': 11999, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:13.161647', 'step': 11999, 'epoch': 2} {'type': 'loss', 'content': 0.13007962703704834, 'timestamp': '2025-10-01 04:33:13.167409', 'step': 12000, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 12000', 'timestamp': '2025-10-01 04:33:13.534051', 'step': 12000, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:13.587017', 'step': 12000, 'epoch': 2} {'type': 'loss', 'content': 0.16195707023143768, 'timestamp': '2025-10-01 04:33:13.589694', 'step': 12001, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:33:13.650712', 'step': 12001, 'epoch': 2} {'type': 'loss', 'content': 0.0791044533252716, 'timestamp': '2025-10-01 04:33:13.653799', 'step': 12002, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:13.715614', 'step': 12002, 'epoch': 2} {'type': 'loss', 'content': 0.21229979395866394, 'timestamp': '2025-10-01 04:33:13.724582', 'step': 12003, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:13.779341', 'step': 12003, 'epoch': 2} {'type': 'loss', 'content': 0.04129844531416893, 'timestamp': '2025-10-01 04:33:13.791818', 'step': 12004, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:13.844793', 'step': 12004, 'epoch': 2} {'type': 'loss', 'content': 0.2031601518392563, 'timestamp': '2025-10-01 04:33:13.846654', 'step': 12005, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:13.899479', 'step': 12005, 'epoch': 2} {'type': 'loss', 'content': 0.06937339156866074, 'timestamp': '2025-10-01 04:33:13.901753', 'step': 12006, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:13.955525', 'step': 12006, 'epoch': 2} {'type': 'loss', 'content': 0.093688003718853, 'timestamp': '2025-10-01 04:33:13.957814', 'step': 12007, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:14.018664', 'step': 12007, 'epoch': 2} {'type': 'loss', 'content': 0.1471124142408371, 'timestamp': '2025-10-01 04:33:14.024083', 'step': 12008, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:14.076143', 'step': 12008, 'epoch': 2} {'type': 'loss', 'content': 0.0990217849612236, 'timestamp': '2025-10-01 04:33:14.078206', 'step': 12009, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:14.134136', 'step': 12009, 'epoch': 2} {'type': 'loss', 'content': 0.16116589307785034, 'timestamp': '2025-10-01 04:33:14.137948', 'step': 12010, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:14.191654', 'step': 12010, 'epoch': 2} {'type': 'loss', 'content': 0.041376180946826935, 'timestamp': '2025-10-01 04:33:14.193791', 'step': 12011, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:14.248088', 'step': 12011, 'epoch': 2} {'type': 'loss', 'content': 0.09117167443037033, 'timestamp': '2025-10-01 04:33:14.253597', 'step': 12012, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:14.312298', 'step': 12012, 'epoch': 2} {'type': 'loss', 'content': 0.10870897769927979, 'timestamp': '2025-10-01 04:33:14.314560', 'step': 12013, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:14.368222', 'step': 12013, 'epoch': 2} {'type': 'loss', 'content': 0.11651057749986649, 'timestamp': '2025-10-01 04:33:14.374793', 'step': 12014, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:14.436126', 'step': 12014, 'epoch': 2} {'type': 'loss', 'content': 0.13533905148506165, 'timestamp': '2025-10-01 04:33:14.443115', 'step': 12015, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:33:14.502114', 'step': 12015, 'epoch': 2} {'type': 'loss', 'content': 0.07422684878110886, 'timestamp': '2025-10-01 04:33:14.508997', 'step': 12016, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:14.564374', 'step': 12016, 'epoch': 2} {'type': 'loss', 'content': 0.11826741695404053, 'timestamp': '2025-10-01 04:33:14.566393', 'step': 12017, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:33:14.619330', 'step': 12017, 'epoch': 2} {'type': 'loss', 'content': 0.08838707208633423, 'timestamp': '2025-10-01 04:33:14.621267', 'step': 12018, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:33:14.674106', 'step': 12018, 'epoch': 2} {'type': 'loss', 'content': 0.064157634973526, 'timestamp': '2025-10-01 04:33:14.676336', 'step': 12019, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:14.729442', 'step': 12019, 'epoch': 2} {'type': 'loss', 'content': 0.0958976075053215, 'timestamp': '2025-10-01 04:33:14.735287', 'step': 12020, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:14.788077', 'step': 12020, 'epoch': 2} {'type': 'loss', 'content': 0.10463166981935501, 'timestamp': '2025-10-01 04:33:14.790114', 'step': 12021, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:14.842810', 'step': 12021, 'epoch': 2} {'type': 'loss', 'content': 0.10891945660114288, 'timestamp': '2025-10-01 04:33:14.844841', 'step': 12022, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:14.899259', 'step': 12022, 'epoch': 2} {'type': 'loss', 'content': 0.0997716411948204, 'timestamp': '2025-10-01 04:33:14.901523', 'step': 12023, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:14.955525', 'step': 12023, 'epoch': 2} {'type': 'loss', 'content': 0.1530551314353943, 'timestamp': '2025-10-01 04:33:14.961997', 'step': 12024, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:15.014793', 'step': 12024, 'epoch': 2} {'type': 'loss', 'content': 0.0942172184586525, 'timestamp': '2025-10-01 04:33:15.017333', 'step': 12025, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:15.070702', 'step': 12025, 'epoch': 2} {'type': 'loss', 'content': 0.06885899603366852, 'timestamp': '2025-10-01 04:33:15.072911', 'step': 12026, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:15.126144', 'step': 12026, 'epoch': 2} {'type': 'loss', 'content': 0.15242472290992737, 'timestamp': '2025-10-01 04:33:15.128183', 'step': 12027, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:15.181561', 'step': 12027, 'epoch': 2} {'type': 'loss', 'content': 0.057593099772930145, 'timestamp': '2025-10-01 04:33:15.187375', 'step': 12028, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:15.239882', 'step': 12028, 'epoch': 2} {'type': 'loss', 'content': 0.16810014843940735, 'timestamp': '2025-10-01 04:33:15.242513', 'step': 12029, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:15.303988', 'step': 12029, 'epoch': 2} {'type': 'loss', 'content': 0.19067075848579407, 'timestamp': '2025-10-01 04:33:15.306024', 'step': 12030, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:15.359163', 'step': 12030, 'epoch': 2} {'type': 'loss', 'content': 0.09195323288440704, 'timestamp': '2025-10-01 04:33:15.361812', 'step': 12031, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:15.415113', 'step': 12031, 'epoch': 2} {'type': 'loss', 'content': 0.12145453691482544, 'timestamp': '2025-10-01 04:33:15.420805', 'step': 12032, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:33:15.477541', 'step': 12032, 'epoch': 2} {'type': 'loss', 'content': 0.16343316435813904, 'timestamp': '2025-10-01 04:33:15.479570', 'step': 12033, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:15.531983', 'step': 12033, 'epoch': 2} {'type': 'loss', 'content': 0.0562111921608448, 'timestamp': '2025-10-01 04:33:15.533864', 'step': 12034, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:15.587182', 'step': 12034, 'epoch': 2} {'type': 'loss', 'content': 0.1175985112786293, 'timestamp': '2025-10-01 04:33:15.589266', 'step': 12035, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:15.642272', 'step': 12035, 'epoch': 2} {'type': 'loss', 'content': 0.091082364320755, 'timestamp': '2025-10-01 04:33:15.647859', 'step': 12036, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:33:15.700539', 'step': 12036, 'epoch': 2} {'type': 'loss', 'content': 0.16330452263355255, 'timestamp': '2025-10-01 04:33:15.702661', 'step': 12037, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:15.756792', 'step': 12037, 'epoch': 2} {'type': 'loss', 'content': 0.11190454661846161, 'timestamp': '2025-10-01 04:33:15.760798', 'step': 12038, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:33:15.814878', 'step': 12038, 'epoch': 2} {'type': 'loss', 'content': 0.03970189765095711, 'timestamp': '2025-10-01 04:33:15.816987', 'step': 12039, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:15.887991', 'step': 12039, 'epoch': 2} {'type': 'loss', 'content': 0.06514852494001389, 'timestamp': '2025-10-01 04:33:15.893316', 'step': 12040, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:15.945914', 'step': 12040, 'epoch': 2} {'type': 'loss', 'content': 0.08312386274337769, 'timestamp': '2025-10-01 04:33:15.948012', 'step': 12041, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:33:16.000871', 'step': 12041, 'epoch': 2} {'type': 'loss', 'content': 0.12022537738084793, 'timestamp': '2025-10-01 04:33:16.003105', 'step': 12042, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:33:16.056359', 'step': 12042, 'epoch': 2} {'type': 'loss', 'content': 0.19305910170078278, 'timestamp': '2025-10-01 04:33:16.058450', 'step': 12043, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:16.111157', 'step': 12043, 'epoch': 2} {'type': 'loss', 'content': 0.08291184157133102, 'timestamp': '2025-10-01 04:33:16.116606', 'step': 12044, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:16.168976', 'step': 12044, 'epoch': 2} {'type': 'loss', 'content': 0.15747018158435822, 'timestamp': '2025-10-01 04:33:16.171088', 'step': 12045, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:16.223690', 'step': 12045, 'epoch': 2} {'type': 'loss', 'content': 0.07910895347595215, 'timestamp': '2025-10-01 04:33:16.225595', 'step': 12046, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:16.278211', 'step': 12046, 'epoch': 2} {'type': 'loss', 'content': 0.08036620914936066, 'timestamp': '2025-10-01 04:33:16.280919', 'step': 12047, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:16.334098', 'step': 12047, 'epoch': 2} {'type': 'loss', 'content': 0.1671392023563385, 'timestamp': '2025-10-01 04:33:16.339488', 'step': 12048, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:33:16.391612', 'step': 12048, 'epoch': 2} {'type': 'loss', 'content': 0.0865454375743866, 'timestamp': '2025-10-01 04:33:16.393895', 'step': 12049, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:16.446880', 'step': 12049, 'epoch': 2} {'type': 'loss', 'content': 0.09125003218650818, 'timestamp': '2025-10-01 04:33:16.449078', 'step': 12050, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:16.502041', 'step': 12050, 'epoch': 2} {'type': 'loss', 'content': 0.09687363356351852, 'timestamp': '2025-10-01 04:33:16.503909', 'step': 12051, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:33:16.557678', 'step': 12051, 'epoch': 2} {'type': 'loss', 'content': 0.12859152257442474, 'timestamp': '2025-10-01 04:33:16.563415', 'step': 12052, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:16.616686', 'step': 12052, 'epoch': 2} {'type': 'loss', 'content': 0.11071185022592545, 'timestamp': '2025-10-01 04:33:16.618882', 'step': 12053, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:16.673088', 'step': 12053, 'epoch': 2} {'type': 'loss', 'content': 0.20925581455230713, 'timestamp': '2025-10-01 04:33:16.675533', 'step': 12054, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:33:16.728937', 'step': 12054, 'epoch': 2} {'type': 'loss', 'content': 0.13779322803020477, 'timestamp': '2025-10-01 04:33:16.730828', 'step': 12055, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:16.783734', 'step': 12055, 'epoch': 2} {'type': 'loss', 'content': 0.08552522957324982, 'timestamp': '2025-10-01 04:33:16.789601', 'step': 12056, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:16.854948', 'step': 12056, 'epoch': 2} {'type': 'loss', 'content': 0.14262822270393372, 'timestamp': '2025-10-01 04:33:16.857359', 'step': 12057, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:16.910432', 'step': 12057, 'epoch': 2} {'type': 'loss', 'content': 0.07489147037267685, 'timestamp': '2025-10-01 04:33:16.912906', 'step': 12058, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:33:16.966168', 'step': 12058, 'epoch': 2} {'type': 'loss', 'content': 0.12677384912967682, 'timestamp': '2025-10-01 04:33:16.968900', 'step': 12059, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:17.022204', 'step': 12059, 'epoch': 2} {'type': 'loss', 'content': 0.14799989759922028, 'timestamp': '2025-10-01 04:33:17.027803', 'step': 12060, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:17.080737', 'step': 12060, 'epoch': 2} {'type': 'loss', 'content': 0.16181783378124237, 'timestamp': '2025-10-01 04:33:17.082784', 'step': 12061, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:17.136096', 'step': 12061, 'epoch': 2} {'type': 'loss', 'content': 0.061206165701150894, 'timestamp': '2025-10-01 04:33:17.138202', 'step': 12062, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:33:17.191258', 'step': 12062, 'epoch': 2} {'type': 'loss', 'content': 0.1631910800933838, 'timestamp': '2025-10-01 04:33:17.194252', 'step': 12063, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:17.259759', 'step': 12063, 'epoch': 2} {'type': 'loss', 'content': 0.11883603036403656, 'timestamp': '2025-10-01 04:33:17.266625', 'step': 12064, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:17.319454', 'step': 12064, 'epoch': 2} {'type': 'loss', 'content': 0.12874481081962585, 'timestamp': '2025-10-01 04:33:17.321790', 'step': 12065, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:17.376406', 'step': 12065, 'epoch': 2} {'type': 'loss', 'content': 0.11733383685350418, 'timestamp': '2025-10-01 04:33:17.378563', 'step': 12066, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:17.431932', 'step': 12066, 'epoch': 2} {'type': 'loss', 'content': 0.08080022037029266, 'timestamp': '2025-10-01 04:33:17.434672', 'step': 12067, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:17.487868', 'step': 12067, 'epoch': 2} {'type': 'loss', 'content': 0.10623256117105484, 'timestamp': '2025-10-01 04:33:17.493823', 'step': 12068, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:17.555740', 'step': 12068, 'epoch': 2} {'type': 'loss', 'content': 0.10105834901332855, 'timestamp': '2025-10-01 04:33:17.558479', 'step': 12069, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:17.612596', 'step': 12069, 'epoch': 2} {'type': 'loss', 'content': 0.11016128957271576, 'timestamp': '2025-10-01 04:33:17.614755', 'step': 12070, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:17.669112', 'step': 12070, 'epoch': 2} {'type': 'loss', 'content': 0.12564139068126678, 'timestamp': '2025-10-01 04:33:17.671189', 'step': 12071, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:17.725310', 'step': 12071, 'epoch': 2} {'type': 'loss', 'content': 0.10832729190587997, 'timestamp': '2025-10-01 04:33:17.731011', 'step': 12072, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:17.783380', 'step': 12072, 'epoch': 2} {'type': 'loss', 'content': 0.2593975365161896, 'timestamp': '2025-10-01 04:33:17.785445', 'step': 12073, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:17.838255', 'step': 12073, 'epoch': 2} {'type': 'loss', 'content': 0.11240620911121368, 'timestamp': '2025-10-01 04:33:17.840567', 'step': 12074, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:17.893408', 'step': 12074, 'epoch': 2} {'type': 'loss', 'content': 0.09084483981132507, 'timestamp': '2025-10-01 04:33:17.896085', 'step': 12075, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:17.949281', 'step': 12075, 'epoch': 2} {'type': 'loss', 'content': 0.08376210927963257, 'timestamp': '2025-10-01 04:33:17.957852', 'step': 12076, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:18.010989', 'step': 12076, 'epoch': 2} {'type': 'loss', 'content': 0.07218953222036362, 'timestamp': '2025-10-01 04:33:18.012857', 'step': 12077, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:33:18.065874', 'step': 12077, 'epoch': 2} {'type': 'loss', 'content': 0.055893685668706894, 'timestamp': '2025-10-01 04:33:18.073914', 'step': 12078, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:18.127478', 'step': 12078, 'epoch': 2} {'type': 'loss', 'content': 0.11048257350921631, 'timestamp': '2025-10-01 04:33:18.129651', 'step': 12079, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:18.182453', 'step': 12079, 'epoch': 2} {'type': 'loss', 'content': 0.1989106684923172, 'timestamp': '2025-10-01 04:33:18.187836', 'step': 12080, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:18.240358', 'step': 12080, 'epoch': 2} {'type': 'loss', 'content': 0.1299229860305786, 'timestamp': '2025-10-01 04:33:18.242271', 'step': 12081, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:33:18.295827', 'step': 12081, 'epoch': 2} {'type': 'loss', 'content': 0.12228251993656158, 'timestamp': '2025-10-01 04:33:18.297867', 'step': 12082, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:18.351558', 'step': 12082, 'epoch': 2} {'type': 'loss', 'content': 0.05916536599397659, 'timestamp': '2025-10-01 04:33:18.353449', 'step': 12083, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:18.406299', 'step': 12083, 'epoch': 2} {'type': 'loss', 'content': 0.11403768509626389, 'timestamp': '2025-10-01 04:33:18.412027', 'step': 12084, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:18.471040', 'step': 12084, 'epoch': 2} {'type': 'loss', 'content': 0.2828220725059509, 'timestamp': '2025-10-01 04:33:18.472962', 'step': 12085, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:18.525855', 'step': 12085, 'epoch': 2} {'type': 'loss', 'content': 0.15220075845718384, 'timestamp': '2025-10-01 04:33:18.528807', 'step': 12086, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:33:18.583232', 'step': 12086, 'epoch': 2} {'type': 'loss', 'content': 0.14005731046199799, 'timestamp': '2025-10-01 04:33:18.585486', 'step': 12087, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:18.639423', 'step': 12087, 'epoch': 2} {'type': 'loss', 'content': 0.058987729251384735, 'timestamp': '2025-10-01 04:33:18.645055', 'step': 12088, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:18.697882', 'step': 12088, 'epoch': 2} {'type': 'loss', 'content': 0.19211378693580627, 'timestamp': '2025-10-01 04:33:18.700146', 'step': 12089, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:18.753415', 'step': 12089, 'epoch': 2} {'type': 'loss', 'content': 0.12858617305755615, 'timestamp': '2025-10-01 04:33:18.755600', 'step': 12090, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:18.808456', 'step': 12090, 'epoch': 2} {'type': 'loss', 'content': 0.08417674899101257, 'timestamp': '2025-10-01 04:33:18.810804', 'step': 12091, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:18.866821', 'step': 12091, 'epoch': 2} {'type': 'loss', 'content': 0.12711119651794434, 'timestamp': '2025-10-01 04:33:18.872527', 'step': 12092, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:18.925342', 'step': 12092, 'epoch': 2} {'type': 'loss', 'content': 0.13623018562793732, 'timestamp': '2025-10-01 04:33:18.927678', 'step': 12093, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:18.980847', 'step': 12093, 'epoch': 2} {'type': 'loss', 'content': 0.10962381213903427, 'timestamp': '2025-10-01 04:33:18.982841', 'step': 12094, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:33:19.035870', 'step': 12094, 'epoch': 2} {'type': 'loss', 'content': 0.10476367920637131, 'timestamp': '2025-10-01 04:33:19.038119', 'step': 12095, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:19.091651', 'step': 12095, 'epoch': 2} {'type': 'loss', 'content': 0.08012909442186356, 'timestamp': '2025-10-01 04:33:19.100609', 'step': 12096, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:19.153545', 'step': 12096, 'epoch': 2} {'type': 'loss', 'content': 0.19218888878822327, 'timestamp': '2025-10-01 04:33:19.155228', 'step': 12097, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:19.208562', 'step': 12097, 'epoch': 2} {'type': 'loss', 'content': 0.16044841706752777, 'timestamp': '2025-10-01 04:33:19.210834', 'step': 12098, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:19.267241', 'step': 12098, 'epoch': 2} {'type': 'loss', 'content': 0.21865203976631165, 'timestamp': '2025-10-01 04:33:19.269463', 'step': 12099, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:19.322467', 'step': 12099, 'epoch': 2} {'type': 'loss', 'content': 0.17997342348098755, 'timestamp': '2025-10-01 04:33:19.328059', 'step': 12100, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:19.379967', 'step': 12100, 'epoch': 2} {'type': 'loss', 'content': 0.1291874796152115, 'timestamp': '2025-10-01 04:33:19.382050', 'step': 12101, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:33:19.435291', 'step': 12101, 'epoch': 2} {'type': 'loss', 'content': 0.10229533165693283, 'timestamp': '2025-10-01 04:33:19.437490', 'step': 12102, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:19.490220', 'step': 12102, 'epoch': 2} {'type': 'loss', 'content': 0.056443460285663605, 'timestamp': '2025-10-01 04:33:19.492320', 'step': 12103, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:19.545417', 'step': 12103, 'epoch': 2} {'type': 'loss', 'content': 0.1481199860572815, 'timestamp': '2025-10-01 04:33:19.550858', 'step': 12104, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:19.603397', 'step': 12104, 'epoch': 2} {'type': 'loss', 'content': 0.13111771643161774, 'timestamp': '2025-10-01 04:33:19.605426', 'step': 12105, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:19.658990', 'step': 12105, 'epoch': 2} {'type': 'loss', 'content': 0.10454446822404861, 'timestamp': '2025-10-01 04:33:19.660957', 'step': 12106, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:19.715357', 'step': 12106, 'epoch': 2} {'type': 'loss', 'content': 0.14261595904827118, 'timestamp': '2025-10-01 04:33:19.717604', 'step': 12107, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:33:19.770641', 'step': 12107, 'epoch': 2} {'type': 'loss', 'content': 0.10325887054204941, 'timestamp': '2025-10-01 04:33:19.776275', 'step': 12108, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:33:19.833616', 'step': 12108, 'epoch': 2} {'type': 'loss', 'content': 0.08008342236280441, 'timestamp': '2025-10-01 04:33:19.835690', 'step': 12109, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:19.888817', 'step': 12109, 'epoch': 2} {'type': 'loss', 'content': 0.12877467274665833, 'timestamp': '2025-10-01 04:33:19.891665', 'step': 12110, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:19.945442', 'step': 12110, 'epoch': 2} {'type': 'loss', 'content': 0.23518036305904388, 'timestamp': '2025-10-01 04:33:19.947323', 'step': 12111, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:20.000780', 'step': 12111, 'epoch': 2} {'type': 'loss', 'content': 0.06208690628409386, 'timestamp': '2025-10-01 04:33:20.010670', 'step': 12112, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:20.064032', 'step': 12112, 'epoch': 2} {'type': 'loss', 'content': 0.12690593302249908, 'timestamp': '2025-10-01 04:33:20.066113', 'step': 12113, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:20.119826', 'step': 12113, 'epoch': 2} {'type': 'loss', 'content': 0.22145065665245056, 'timestamp': '2025-10-01 04:33:20.121956', 'step': 12114, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:20.176758', 'step': 12114, 'epoch': 2} {'type': 'loss', 'content': 0.12369085103273392, 'timestamp': '2025-10-01 04:33:20.178782', 'step': 12115, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:20.231268', 'step': 12115, 'epoch': 2} {'type': 'loss', 'content': 0.06650373339653015, 'timestamp': '2025-10-01 04:33:20.236947', 'step': 12116, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-01 04:33:33.771628', 'step': 12116, 'epoch': 2} {'type': 'pplx', 'content': 13081.304840285246, 'timestamp': '2025-10-01 04:33:33.781139', 'step': 12116, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:33.841011', 'step': 12116, 'epoch': 2} {'type': 'loss', 'content': 0.10408861190080643, 'timestamp': '2025-10-01 04:33:33.844299', 'step': 12117, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:33.913348', 'step': 12117, 'epoch': 2} {'type': 'loss', 'content': 0.15044976770877838, 'timestamp': '2025-10-01 04:33:33.917255', 'step': 12118, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:33:33.972638', 'step': 12118, 'epoch': 2} {'type': 'loss', 'content': 0.09021621942520142, 'timestamp': '2025-10-01 04:33:33.976663', 'step': 12119, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:34.048300', 'step': 12119, 'epoch': 2} {'type': 'loss', 'content': 0.1848505735397339, 'timestamp': '2025-10-01 04:33:34.055184', 'step': 12120, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:34.121835', 'step': 12120, 'epoch': 2} {'type': 'loss', 'content': 0.0735587552189827, 'timestamp': '2025-10-01 04:33:34.124602', 'step': 12121, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:34.197756', 'step': 12121, 'epoch': 2} {'type': 'loss', 'content': 0.1427185833454132, 'timestamp': '2025-10-01 04:33:34.199836', 'step': 12122, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:34.254109', 'step': 12122, 'epoch': 2} {'type': 'loss', 'content': 0.1267741471529007, 'timestamp': '2025-10-01 04:33:34.256987', 'step': 12123, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:34.330318', 'step': 12123, 'epoch': 2} {'type': 'loss', 'content': 0.11085399985313416, 'timestamp': '2025-10-01 04:33:34.348830', 'step': 12124, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:34.415387', 'step': 12124, 'epoch': 2} {'type': 'loss', 'content': 0.07520986348390579, 'timestamp': '2025-10-01 04:33:34.419866', 'step': 12125, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:34.527886', 'step': 12125, 'epoch': 2} {'type': 'loss', 'content': 0.1555642932653427, 'timestamp': '2025-10-01 04:33:34.531164', 'step': 12126, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:33:34.608230', 'step': 12126, 'epoch': 2} {'type': 'loss', 'content': 0.07436712086200714, 'timestamp': '2025-10-01 04:33:34.610936', 'step': 12127, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:34.674031', 'step': 12127, 'epoch': 2} {'type': 'loss', 'content': 0.15718892216682434, 'timestamp': '2025-10-01 04:33:34.684749', 'step': 12128, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:34.746638', 'step': 12128, 'epoch': 2} {'type': 'loss', 'content': 0.1260932981967926, 'timestamp': '2025-10-01 04:33:34.749447', 'step': 12129, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:33:34.829260', 'step': 12129, 'epoch': 2} {'type': 'loss', 'content': 0.15064162015914917, 'timestamp': '2025-10-01 04:33:34.831552', 'step': 12130, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:34.887870', 'step': 12130, 'epoch': 2} {'type': 'loss', 'content': 0.17518250644207, 'timestamp': '2025-10-01 04:33:34.889771', 'step': 12131, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:34.943559', 'step': 12131, 'epoch': 2} {'type': 'loss', 'content': 0.1952129304409027, 'timestamp': '2025-10-01 04:33:34.949273', 'step': 12132, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:33:35.002111', 'step': 12132, 'epoch': 2} {'type': 'loss', 'content': 0.1311500519514084, 'timestamp': '2025-10-01 04:33:35.004010', 'step': 12133, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:35.057065', 'step': 12133, 'epoch': 2} {'type': 'loss', 'content': 0.13150687515735626, 'timestamp': '2025-10-01 04:33:35.059167', 'step': 12134, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:33:35.113638', 'step': 12134, 'epoch': 2} {'type': 'loss', 'content': 0.16574665904045105, 'timestamp': '2025-10-01 04:33:35.115733', 'step': 12135, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:35.169189', 'step': 12135, 'epoch': 2} {'type': 'loss', 'content': 0.1478305160999298, 'timestamp': '2025-10-01 04:33:35.186433', 'step': 12136, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:35.242455', 'step': 12136, 'epoch': 2} {'type': 'loss', 'content': 0.16164140403270721, 'timestamp': '2025-10-01 04:33:35.244523', 'step': 12137, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:35.298387', 'step': 12137, 'epoch': 2} {'type': 'loss', 'content': 0.11603513360023499, 'timestamp': '2025-10-01 04:33:35.301010', 'step': 12138, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:35.369106', 'step': 12138, 'epoch': 2} {'type': 'loss', 'content': 0.17841236293315887, 'timestamp': '2025-10-01 04:33:35.371188', 'step': 12139, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:35.424468', 'step': 12139, 'epoch': 2} {'type': 'loss', 'content': 0.11750001460313797, 'timestamp': '2025-10-01 04:33:35.429912', 'step': 12140, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:35.482968', 'step': 12140, 'epoch': 2} {'type': 'loss', 'content': 0.08955779671669006, 'timestamp': '2025-10-01 04:33:35.485030', 'step': 12141, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:35.538472', 'step': 12141, 'epoch': 2} {'type': 'loss', 'content': 0.15649019181728363, 'timestamp': '2025-10-01 04:33:35.540668', 'step': 12142, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:35.593934', 'step': 12142, 'epoch': 2} {'type': 'loss', 'content': 0.05896258354187012, 'timestamp': '2025-10-01 04:33:35.596133', 'step': 12143, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:35.649707', 'step': 12143, 'epoch': 2} {'type': 'loss', 'content': 0.15525436401367188, 'timestamp': '2025-10-01 04:33:35.655546', 'step': 12144, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:35.708601', 'step': 12144, 'epoch': 2} {'type': 'loss', 'content': 0.10620524734258652, 'timestamp': '2025-10-01 04:33:35.710957', 'step': 12145, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:35.764965', 'step': 12145, 'epoch': 2} {'type': 'loss', 'content': 0.17928382754325867, 'timestamp': '2025-10-01 04:33:35.767050', 'step': 12146, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:33:35.821010', 'step': 12146, 'epoch': 2} {'type': 'loss', 'content': 0.13101449608802795, 'timestamp': '2025-10-01 04:33:35.823328', 'step': 12147, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:35.877429', 'step': 12147, 'epoch': 2} {'type': 'loss', 'content': 0.08697344362735748, 'timestamp': '2025-10-01 04:33:35.883470', 'step': 12148, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:35.936647', 'step': 12148, 'epoch': 2} {'type': 'loss', 'content': 0.03418458253145218, 'timestamp': '2025-10-01 04:33:35.946587', 'step': 12149, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:33:36.000333', 'step': 12149, 'epoch': 2} {'type': 'loss', 'content': 0.06694527715444565, 'timestamp': '2025-10-01 04:33:36.005559', 'step': 12150, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:36.060000', 'step': 12150, 'epoch': 2} {'type': 'loss', 'content': 0.1581219583749771, 'timestamp': '2025-10-01 04:33:36.062065', 'step': 12151, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:36.116790', 'step': 12151, 'epoch': 2} {'type': 'loss', 'content': 0.07915008068084717, 'timestamp': '2025-10-01 04:33:36.122424', 'step': 12152, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:36.174892', 'step': 12152, 'epoch': 2} {'type': 'loss', 'content': 0.07859591394662857, 'timestamp': '2025-10-01 04:33:36.176943', 'step': 12153, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:36.229922', 'step': 12153, 'epoch': 2} {'type': 'loss', 'content': 0.140062615275383, 'timestamp': '2025-10-01 04:33:36.232463', 'step': 12154, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:36.285853', 'step': 12154, 'epoch': 2} {'type': 'loss', 'content': 0.09780315309762955, 'timestamp': '2025-10-01 04:33:36.288240', 'step': 12155, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:33:36.341728', 'step': 12155, 'epoch': 2} {'type': 'loss', 'content': 0.10977454483509064, 'timestamp': '2025-10-01 04:33:36.347380', 'step': 12156, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:36.399828', 'step': 12156, 'epoch': 2} {'type': 'loss', 'content': 0.08244995772838593, 'timestamp': '2025-10-01 04:33:36.401972', 'step': 12157, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:33:36.455489', 'step': 12157, 'epoch': 2} {'type': 'loss', 'content': 0.19222047924995422, 'timestamp': '2025-10-01 04:33:36.457472', 'step': 12158, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:36.515090', 'step': 12158, 'epoch': 2} {'type': 'loss', 'content': 0.11020675301551819, 'timestamp': '2025-10-01 04:33:36.517497', 'step': 12159, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:36.583749', 'step': 12159, 'epoch': 2} {'type': 'loss', 'content': 0.27475258708000183, 'timestamp': '2025-10-01 04:33:36.589605', 'step': 12160, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:36.653281', 'step': 12160, 'epoch': 2} {'type': 'loss', 'content': 0.11610502749681473, 'timestamp': '2025-10-01 04:33:36.655504', 'step': 12161, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:36.709002', 'step': 12161, 'epoch': 2} {'type': 'loss', 'content': 0.07335007935762405, 'timestamp': '2025-10-01 04:33:36.711182', 'step': 12162, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:36.764317', 'step': 12162, 'epoch': 2} {'type': 'loss', 'content': 0.07580500841140747, 'timestamp': '2025-10-01 04:33:36.766329', 'step': 12163, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:36.819736', 'step': 12163, 'epoch': 2} {'type': 'loss', 'content': 0.16097582876682281, 'timestamp': '2025-10-01 04:33:36.825305', 'step': 12164, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:36.878089', 'step': 12164, 'epoch': 2} {'type': 'loss', 'content': 0.07713662832975388, 'timestamp': '2025-10-01 04:33:36.880947', 'step': 12165, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:36.940328', 'step': 12165, 'epoch': 2} {'type': 'loss', 'content': 0.09724804759025574, 'timestamp': '2025-10-01 04:33:36.943016', 'step': 12166, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:36.998160', 'step': 12166, 'epoch': 2} {'type': 'loss', 'content': 0.09904593974351883, 'timestamp': '2025-10-01 04:33:37.000462', 'step': 12167, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:37.054403', 'step': 12167, 'epoch': 2} {'type': 'loss', 'content': 0.13163337111473083, 'timestamp': '2025-10-01 04:33:37.059878', 'step': 12168, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:33:37.114202', 'step': 12168, 'epoch': 2} {'type': 'loss', 'content': 0.08020657300949097, 'timestamp': '2025-10-01 04:33:37.116425', 'step': 12169, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:37.170003', 'step': 12169, 'epoch': 2} {'type': 'loss', 'content': 0.08952068537473679, 'timestamp': '2025-10-01 04:33:37.172200', 'step': 12170, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:37.225889', 'step': 12170, 'epoch': 2} {'type': 'loss', 'content': 0.12842342257499695, 'timestamp': '2025-10-01 04:33:37.228059', 'step': 12171, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:37.281235', 'step': 12171, 'epoch': 2} {'type': 'loss', 'content': 0.16570332646369934, 'timestamp': '2025-10-01 04:33:37.300804', 'step': 12172, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:37.353771', 'step': 12172, 'epoch': 2} {'type': 'loss', 'content': 0.18006213009357452, 'timestamp': '2025-10-01 04:33:37.356416', 'step': 12173, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:37.415877', 'step': 12173, 'epoch': 2} {'type': 'loss', 'content': 0.318967342376709, 'timestamp': '2025-10-01 04:33:37.418863', 'step': 12174, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:37.481700', 'step': 12174, 'epoch': 2} {'type': 'loss', 'content': 0.20559564232826233, 'timestamp': '2025-10-01 04:33:37.483827', 'step': 12175, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:37.537634', 'step': 12175, 'epoch': 2} {'type': 'loss', 'content': 0.15740787982940674, 'timestamp': '2025-10-01 04:33:37.543257', 'step': 12176, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:37.596331', 'step': 12176, 'epoch': 2} {'type': 'loss', 'content': 0.15265730023384094, 'timestamp': '2025-10-01 04:33:37.598481', 'step': 12177, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:37.652040', 'step': 12177, 'epoch': 2} {'type': 'loss', 'content': 0.10579324513673782, 'timestamp': '2025-10-01 04:33:37.654380', 'step': 12178, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:33:37.707912', 'step': 12178, 'epoch': 2} {'type': 'loss', 'content': 0.12792789936065674, 'timestamp': '2025-10-01 04:33:37.710208', 'step': 12179, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:37.764175', 'step': 12179, 'epoch': 2} {'type': 'loss', 'content': 0.05823000892996788, 'timestamp': '2025-10-01 04:33:37.769629', 'step': 12180, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:37.822205', 'step': 12180, 'epoch': 2} {'type': 'loss', 'content': 0.17263270914554596, 'timestamp': '2025-10-01 04:33:37.824222', 'step': 12181, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:33:37.877183', 'step': 12181, 'epoch': 2} {'type': 'loss', 'content': 0.09816537797451019, 'timestamp': '2025-10-01 04:33:37.879248', 'step': 12182, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:37.942247', 'step': 12182, 'epoch': 2} {'type': 'loss', 'content': 0.07652676850557327, 'timestamp': '2025-10-01 04:33:37.944222', 'step': 12183, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:37.997749', 'step': 12183, 'epoch': 2} {'type': 'loss', 'content': 0.08813399076461792, 'timestamp': '2025-10-01 04:33:38.003495', 'step': 12184, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:38.056696', 'step': 12184, 'epoch': 2} {'type': 'loss', 'content': 0.06442314386367798, 'timestamp': '2025-10-01 04:33:38.058918', 'step': 12185, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:38.112091', 'step': 12185, 'epoch': 2} {'type': 'loss', 'content': 0.25644463300704956, 'timestamp': '2025-10-01 04:33:38.114135', 'step': 12186, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:38.167649', 'step': 12186, 'epoch': 2} {'type': 'loss', 'content': 0.19771231710910797, 'timestamp': '2025-10-01 04:33:38.169903', 'step': 12187, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:33:38.224090', 'step': 12187, 'epoch': 2} {'type': 'loss', 'content': 0.19466839730739594, 'timestamp': '2025-10-01 04:33:38.229985', 'step': 12188, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:38.296990', 'step': 12188, 'epoch': 2} {'type': 'loss', 'content': 0.05133721977472305, 'timestamp': '2025-10-01 04:33:38.298914', 'step': 12189, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:38.351734', 'step': 12189, 'epoch': 2} {'type': 'loss', 'content': 0.08109970390796661, 'timestamp': '2025-10-01 04:33:38.353667', 'step': 12190, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:38.407497', 'step': 12190, 'epoch': 2} {'type': 'loss', 'content': 0.1471666395664215, 'timestamp': '2025-10-01 04:33:38.409494', 'step': 12191, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:38.463006', 'step': 12191, 'epoch': 2} {'type': 'loss', 'content': 0.12765611708164215, 'timestamp': '2025-10-01 04:33:38.468622', 'step': 12192, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:38.520979', 'step': 12192, 'epoch': 2} {'type': 'loss', 'content': 0.13114160299301147, 'timestamp': '2025-10-01 04:33:38.523020', 'step': 12193, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:38.575982', 'step': 12193, 'epoch': 2} {'type': 'loss', 'content': 0.09788119047880173, 'timestamp': '2025-10-01 04:33:38.577846', 'step': 12194, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:33:38.632628', 'step': 12194, 'epoch': 2} {'type': 'loss', 'content': 0.21494276821613312, 'timestamp': '2025-10-01 04:33:38.634775', 'step': 12195, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:38.688401', 'step': 12195, 'epoch': 2} {'type': 'loss', 'content': 0.1168939396739006, 'timestamp': '2025-10-01 04:33:38.702599', 'step': 12196, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:38.757810', 'step': 12196, 'epoch': 2} {'type': 'loss', 'content': 0.08354547619819641, 'timestamp': '2025-10-01 04:33:38.760217', 'step': 12197, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:38.814263', 'step': 12197, 'epoch': 2} {'type': 'loss', 'content': 0.12496645748615265, 'timestamp': '2025-10-01 04:33:38.817032', 'step': 12198, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:38.887209', 'step': 12198, 'epoch': 2} {'type': 'loss', 'content': 0.09245015680789948, 'timestamp': '2025-10-01 04:33:38.889501', 'step': 12199, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:38.943208', 'step': 12199, 'epoch': 2} {'type': 'loss', 'content': 0.09009017795324326, 'timestamp': '2025-10-01 04:33:38.949390', 'step': 12200, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:39.003423', 'step': 12200, 'epoch': 2} {'type': 'loss', 'content': 0.1765235960483551, 'timestamp': '2025-10-01 04:33:39.005553', 'step': 12201, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:39.058789', 'step': 12201, 'epoch': 2} {'type': 'loss', 'content': 0.07298285514116287, 'timestamp': '2025-10-01 04:33:39.060954', 'step': 12202, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:39.136490', 'step': 12202, 'epoch': 2} {'type': 'loss', 'content': 0.10252943634986877, 'timestamp': '2025-10-01 04:33:39.138603', 'step': 12203, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:33:39.192612', 'step': 12203, 'epoch': 2} {'type': 'loss', 'content': 0.048946771770715714, 'timestamp': '2025-10-01 04:33:39.198423', 'step': 12204, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:33:39.252178', 'step': 12204, 'epoch': 2} {'type': 'loss', 'content': 0.07110776752233505, 'timestamp': '2025-10-01 04:33:39.254063', 'step': 12205, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:39.309719', 'step': 12205, 'epoch': 2} {'type': 'loss', 'content': 0.226663738489151, 'timestamp': '2025-10-01 04:33:39.312337', 'step': 12206, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:39.367580', 'step': 12206, 'epoch': 2} {'type': 'loss', 'content': 0.15052567422389984, 'timestamp': '2025-10-01 04:33:39.369556', 'step': 12207, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:39.423757', 'step': 12207, 'epoch': 2} {'type': 'loss', 'content': 0.06977077573537827, 'timestamp': '2025-10-01 04:33:39.429936', 'step': 12208, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:33:39.484333', 'step': 12208, 'epoch': 2} {'type': 'loss', 'content': 0.17045636475086212, 'timestamp': '2025-10-01 04:33:39.486242', 'step': 12209, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:39.541626', 'step': 12209, 'epoch': 2} {'type': 'loss', 'content': 0.0591777078807354, 'timestamp': '2025-10-01 04:33:39.544977', 'step': 12210, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:39.599247', 'step': 12210, 'epoch': 2} {'type': 'loss', 'content': 0.17320877313613892, 'timestamp': '2025-10-01 04:33:39.601595', 'step': 12211, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:33:39.655437', 'step': 12211, 'epoch': 2} {'type': 'loss', 'content': 0.13250751793384552, 'timestamp': '2025-10-01 04:33:39.661717', 'step': 12212, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:39.715847', 'step': 12212, 'epoch': 2} {'type': 'loss', 'content': 0.19063439965248108, 'timestamp': '2025-10-01 04:33:39.717877', 'step': 12213, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:39.771416', 'step': 12213, 'epoch': 2} {'type': 'loss', 'content': 0.1551201045513153, 'timestamp': '2025-10-01 04:33:39.773488', 'step': 12214, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:39.826434', 'step': 12214, 'epoch': 2} {'type': 'loss', 'content': 0.1560335010290146, 'timestamp': '2025-10-01 04:33:39.828923', 'step': 12215, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:39.891741', 'step': 12215, 'epoch': 2} {'type': 'loss', 'content': 0.14750540256500244, 'timestamp': '2025-10-01 04:33:39.905176', 'step': 12216, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:39.967785', 'step': 12216, 'epoch': 2} {'type': 'loss', 'content': 0.12128232419490814, 'timestamp': '2025-10-01 04:33:39.969785', 'step': 12217, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:40.024507', 'step': 12217, 'epoch': 2} {'type': 'loss', 'content': 0.10459215193986893, 'timestamp': '2025-10-01 04:33:40.026494', 'step': 12218, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:33:40.079490', 'step': 12218, 'epoch': 2} {'type': 'loss', 'content': 0.1288028061389923, 'timestamp': '2025-10-01 04:33:40.081419', 'step': 12219, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:40.143682', 'step': 12219, 'epoch': 2} {'type': 'loss', 'content': 0.0978064090013504, 'timestamp': '2025-10-01 04:33:40.149455', 'step': 12220, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:40.208912', 'step': 12220, 'epoch': 2} {'type': 'loss', 'content': 0.1549108624458313, 'timestamp': '2025-10-01 04:33:40.211274', 'step': 12221, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:40.295290', 'step': 12221, 'epoch': 2} {'type': 'loss', 'content': 0.059625063091516495, 'timestamp': '2025-10-01 04:33:40.297944', 'step': 12222, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:40.358004', 'step': 12222, 'epoch': 2} {'type': 'loss', 'content': 0.04129933938384056, 'timestamp': '2025-10-01 04:33:40.361560', 'step': 12223, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:33:40.418044', 'step': 12223, 'epoch': 2} {'type': 'loss', 'content': 0.12177357077598572, 'timestamp': '2025-10-01 04:33:40.425740', 'step': 12224, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:40.479390', 'step': 12224, 'epoch': 2} {'type': 'loss', 'content': 0.12603607773780823, 'timestamp': '2025-10-01 04:33:40.481081', 'step': 12225, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:33:40.535392', 'step': 12225, 'epoch': 2} {'type': 'loss', 'content': 0.27114054560661316, 'timestamp': '2025-10-01 04:33:40.537427', 'step': 12226, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:40.600161', 'step': 12226, 'epoch': 2} {'type': 'loss', 'content': 0.16181401908397675, 'timestamp': '2025-10-01 04:33:40.605017', 'step': 12227, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:33:40.661337', 'step': 12227, 'epoch': 2} {'type': 'loss', 'content': 0.1680612862110138, 'timestamp': '2025-10-01 04:33:40.667877', 'step': 12228, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:40.722651', 'step': 12228, 'epoch': 2} {'type': 'loss', 'content': 0.11001165211200714, 'timestamp': '2025-10-01 04:33:40.730870', 'step': 12229, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:40.791681', 'step': 12229, 'epoch': 2} {'type': 'loss', 'content': 0.17666123807430267, 'timestamp': '2025-10-01 04:33:40.794368', 'step': 12230, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:40.868278', 'step': 12230, 'epoch': 2} {'type': 'loss', 'content': 0.09206011891365051, 'timestamp': '2025-10-01 04:33:40.875574', 'step': 12231, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:40.937892', 'step': 12231, 'epoch': 2} {'type': 'loss', 'content': 0.06305190920829773, 'timestamp': '2025-10-01 04:33:40.945886', 'step': 12232, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:41.003053', 'step': 12232, 'epoch': 2} {'type': 'loss', 'content': 0.20442397892475128, 'timestamp': '2025-10-01 04:33:41.005112', 'step': 12233, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:41.063046', 'step': 12233, 'epoch': 2} {'type': 'loss', 'content': 0.1684938222169876, 'timestamp': '2025-10-01 04:33:41.065383', 'step': 12234, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:41.122045', 'step': 12234, 'epoch': 2} {'type': 'loss', 'content': 0.15759873390197754, 'timestamp': '2025-10-01 04:33:41.124775', 'step': 12235, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:41.181480', 'step': 12235, 'epoch': 2} {'type': 'loss', 'content': 0.07820628583431244, 'timestamp': '2025-10-01 04:33:41.189126', 'step': 12236, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:41.256985', 'step': 12236, 'epoch': 2} {'type': 'loss', 'content': 0.10113368928432465, 'timestamp': '2025-10-01 04:33:41.258837', 'step': 12237, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:41.315207', 'step': 12237, 'epoch': 2} {'type': 'loss', 'content': 0.0890653058886528, 'timestamp': '2025-10-01 04:33:41.326838', 'step': 12238, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:41.386623', 'step': 12238, 'epoch': 2} {'type': 'loss', 'content': 0.11836380511522293, 'timestamp': '2025-10-01 04:33:41.389472', 'step': 12239, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:41.449968', 'step': 12239, 'epoch': 2} {'type': 'loss', 'content': 0.12954138219356537, 'timestamp': '2025-10-01 04:33:41.456962', 'step': 12240, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:33:41.515421', 'step': 12240, 'epoch': 2} {'type': 'loss', 'content': 0.22245335578918457, 'timestamp': '2025-10-01 04:33:41.517972', 'step': 12241, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:41.578354', 'step': 12241, 'epoch': 2} {'type': 'loss', 'content': 0.1756085902452469, 'timestamp': '2025-10-01 04:33:41.586024', 'step': 12242, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:41.646678', 'step': 12242, 'epoch': 2} {'type': 'loss', 'content': 0.08415818214416504, 'timestamp': '2025-10-01 04:33:41.649073', 'step': 12243, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:41.704986', 'step': 12243, 'epoch': 2} {'type': 'loss', 'content': 0.2177215814590454, 'timestamp': '2025-10-01 04:33:41.714453', 'step': 12244, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:41.771846', 'step': 12244, 'epoch': 2} {'type': 'loss', 'content': 0.0929628312587738, 'timestamp': '2025-10-01 04:33:41.774138', 'step': 12245, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:41.829561', 'step': 12245, 'epoch': 2} {'type': 'loss', 'content': 0.13099592924118042, 'timestamp': '2025-10-01 04:33:41.832189', 'step': 12246, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:41.907508', 'step': 12246, 'epoch': 2} {'type': 'loss', 'content': 0.0772511437535286, 'timestamp': '2025-10-01 04:33:41.909685', 'step': 12247, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:41.973457', 'step': 12247, 'epoch': 2} {'type': 'loss', 'content': 0.08488590270280838, 'timestamp': '2025-10-01 04:33:41.979598', 'step': 12248, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:42.033722', 'step': 12248, 'epoch': 2} {'type': 'loss', 'content': 0.13207314908504486, 'timestamp': '2025-10-01 04:33:42.035819', 'step': 12249, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:42.090142', 'step': 12249, 'epoch': 2} {'type': 'loss', 'content': 0.12280569225549698, 'timestamp': '2025-10-01 04:33:42.091996', 'step': 12250, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:42.145265', 'step': 12250, 'epoch': 2} {'type': 'loss', 'content': 0.07749710977077484, 'timestamp': '2025-10-01 04:33:42.148131', 'step': 12251, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:33:42.201698', 'step': 12251, 'epoch': 2} {'type': 'loss', 'content': 0.23132836818695068, 'timestamp': '2025-10-01 04:33:42.207572', 'step': 12252, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:42.261186', 'step': 12252, 'epoch': 2} {'type': 'loss', 'content': 0.22068126499652863, 'timestamp': '2025-10-01 04:33:42.263286', 'step': 12253, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:42.316986', 'step': 12253, 'epoch': 2} {'type': 'loss', 'content': 0.11504504829645157, 'timestamp': '2025-10-01 04:33:42.319420', 'step': 12254, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:33:42.373703', 'step': 12254, 'epoch': 2} {'type': 'loss', 'content': 0.12379948049783707, 'timestamp': '2025-10-01 04:33:42.375750', 'step': 12255, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:42.430202', 'step': 12255, 'epoch': 2} {'type': 'loss', 'content': 0.08945104479789734, 'timestamp': '2025-10-01 04:33:42.435927', 'step': 12256, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:33:42.489444', 'step': 12256, 'epoch': 2} {'type': 'loss', 'content': 0.14832229912281036, 'timestamp': '2025-10-01 04:33:42.491170', 'step': 12257, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:42.544539', 'step': 12257, 'epoch': 2} {'type': 'loss', 'content': 0.1471240222454071, 'timestamp': '2025-10-01 04:33:42.546643', 'step': 12258, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:42.600292', 'step': 12258, 'epoch': 2} {'type': 'loss', 'content': 0.09507229924201965, 'timestamp': '2025-10-01 04:33:42.602479', 'step': 12259, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:42.655832', 'step': 12259, 'epoch': 2} {'type': 'loss', 'content': 0.09079156816005707, 'timestamp': '2025-10-01 04:33:42.661836', 'step': 12260, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:42.716681', 'step': 12260, 'epoch': 2} {'type': 'loss', 'content': 0.09853986650705338, 'timestamp': '2025-10-01 04:33:42.718784', 'step': 12261, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:42.772710', 'step': 12261, 'epoch': 2} {'type': 'loss', 'content': 0.14025388658046722, 'timestamp': '2025-10-01 04:33:42.774612', 'step': 12262, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:42.828015', 'step': 12262, 'epoch': 2} {'type': 'loss', 'content': 0.12197122722864151, 'timestamp': '2025-10-01 04:33:42.829796', 'step': 12263, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:42.883308', 'step': 12263, 'epoch': 2} {'type': 'loss', 'content': 0.16568975150585175, 'timestamp': '2025-10-01 04:33:42.889110', 'step': 12264, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:42.942615', 'step': 12264, 'epoch': 2} {'type': 'loss', 'content': 0.24806910753250122, 'timestamp': '2025-10-01 04:33:42.944473', 'step': 12265, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:42.998165', 'step': 12265, 'epoch': 2} {'type': 'loss', 'content': 0.13096800446510315, 'timestamp': '2025-10-01 04:33:43.000075', 'step': 12266, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:43.054410', 'step': 12266, 'epoch': 2} {'type': 'loss', 'content': 0.0663129985332489, 'timestamp': '2025-10-01 04:33:43.056452', 'step': 12267, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:43.110335', 'step': 12267, 'epoch': 2} {'type': 'loss', 'content': 0.16750437021255493, 'timestamp': '2025-10-01 04:33:43.124731', 'step': 12268, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:43.178005', 'step': 12268, 'epoch': 2} {'type': 'loss', 'content': 0.10359175503253937, 'timestamp': '2025-10-01 04:33:43.180334', 'step': 12269, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:33:43.233910', 'step': 12269, 'epoch': 2} {'type': 'loss', 'content': 0.1768009066581726, 'timestamp': '2025-10-01 04:33:43.235778', 'step': 12270, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:43.289557', 'step': 12270, 'epoch': 2} {'type': 'loss', 'content': 0.09508636593818665, 'timestamp': '2025-10-01 04:33:43.291881', 'step': 12271, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:43.345273', 'step': 12271, 'epoch': 2} {'type': 'loss', 'content': 0.15207450091838837, 'timestamp': '2025-10-01 04:33:43.351296', 'step': 12272, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:43.405270', 'step': 12272, 'epoch': 2} {'type': 'loss', 'content': 0.21259216964244843, 'timestamp': '2025-10-01 04:33:43.408590', 'step': 12273, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:43.463450', 'step': 12273, 'epoch': 2} {'type': 'loss', 'content': 0.1592792570590973, 'timestamp': '2025-10-01 04:33:43.465563', 'step': 12274, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:43.520529', 'step': 12274, 'epoch': 2} {'type': 'loss', 'content': 0.07158654183149338, 'timestamp': '2025-10-01 04:33:43.522797', 'step': 12275, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:43.588124', 'step': 12275, 'epoch': 2} {'type': 'loss', 'content': 0.10404732078313828, 'timestamp': '2025-10-01 04:33:43.593835', 'step': 12276, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:43.646867', 'step': 12276, 'epoch': 2} {'type': 'loss', 'content': 0.08950299024581909, 'timestamp': '2025-10-01 04:33:43.648932', 'step': 12277, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:43.708488', 'step': 12277, 'epoch': 2} {'type': 'loss', 'content': 0.10964056849479675, 'timestamp': '2025-10-01 04:33:43.713016', 'step': 12278, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:43.768328', 'step': 12278, 'epoch': 2} {'type': 'loss', 'content': 0.06453947722911835, 'timestamp': '2025-10-01 04:33:43.777105', 'step': 12279, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:33:43.831552', 'step': 12279, 'epoch': 2} {'type': 'loss', 'content': 0.1066623255610466, 'timestamp': '2025-10-01 04:33:43.837668', 'step': 12280, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:43.896231', 'step': 12280, 'epoch': 2} {'type': 'loss', 'content': 0.0907544493675232, 'timestamp': '2025-10-01 04:33:43.898529', 'step': 12281, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:43.952542', 'step': 12281, 'epoch': 2} {'type': 'loss', 'content': 0.19053016602993011, 'timestamp': '2025-10-01 04:33:43.959158', 'step': 12282, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:33:44.013752', 'step': 12282, 'epoch': 2} {'type': 'loss', 'content': 0.03899483382701874, 'timestamp': '2025-10-01 04:33:44.016226', 'step': 12283, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:44.070356', 'step': 12283, 'epoch': 2} {'type': 'loss', 'content': 0.06319200992584229, 'timestamp': '2025-10-01 04:33:44.076587', 'step': 12284, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:44.130212', 'step': 12284, 'epoch': 2} {'type': 'loss', 'content': 0.06129364296793938, 'timestamp': '2025-10-01 04:33:44.132662', 'step': 12285, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:44.190867', 'step': 12285, 'epoch': 2} {'type': 'loss', 'content': 0.07990958541631699, 'timestamp': '2025-10-01 04:33:44.193832', 'step': 12286, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:44.247755', 'step': 12286, 'epoch': 2} {'type': 'loss', 'content': 0.07248202711343765, 'timestamp': '2025-10-01 04:33:44.250071', 'step': 12287, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:44.303646', 'step': 12287, 'epoch': 2} {'type': 'loss', 'content': 0.07760424166917801, 'timestamp': '2025-10-01 04:33:44.310428', 'step': 12288, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:44.367660', 'step': 12288, 'epoch': 2} {'type': 'loss', 'content': 0.09227990359067917, 'timestamp': '2025-10-01 04:33:44.370149', 'step': 12289, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:44.430033', 'step': 12289, 'epoch': 2} {'type': 'loss', 'content': 0.11470480263233185, 'timestamp': '2025-10-01 04:33:44.434445', 'step': 12290, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:44.493628', 'step': 12290, 'epoch': 2} {'type': 'loss', 'content': 0.11936168372631073, 'timestamp': '2025-10-01 04:33:44.496000', 'step': 12291, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:44.555865', 'step': 12291, 'epoch': 2} {'type': 'loss', 'content': 0.14730162918567657, 'timestamp': '2025-10-01 04:33:44.561482', 'step': 12292, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:33:44.614341', 'step': 12292, 'epoch': 2} {'type': 'loss', 'content': 0.07486362755298615, 'timestamp': '2025-10-01 04:33:44.616407', 'step': 12293, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:44.669580', 'step': 12293, 'epoch': 2} {'type': 'loss', 'content': 0.11200907826423645, 'timestamp': '2025-10-01 04:33:44.672874', 'step': 12294, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:44.726296', 'step': 12294, 'epoch': 2} {'type': 'loss', 'content': 0.07802063971757889, 'timestamp': '2025-10-01 04:33:44.728537', 'step': 12295, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:44.782107', 'step': 12295, 'epoch': 2} {'type': 'loss', 'content': 0.13178561627864838, 'timestamp': '2025-10-01 04:33:44.787940', 'step': 12296, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:44.841240', 'step': 12296, 'epoch': 2} {'type': 'loss', 'content': 0.044439807534217834, 'timestamp': '2025-10-01 04:33:44.843370', 'step': 12297, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:44.896926', 'step': 12297, 'epoch': 2} {'type': 'loss', 'content': 0.11654490977525711, 'timestamp': '2025-10-01 04:33:44.899191', 'step': 12298, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:44.952624', 'step': 12298, 'epoch': 2} {'type': 'loss', 'content': 0.19019728899002075, 'timestamp': '2025-10-01 04:33:44.954574', 'step': 12299, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:45.007854', 'step': 12299, 'epoch': 2} {'type': 'loss', 'content': 0.0955481305718422, 'timestamp': '2025-10-01 04:33:45.018123', 'step': 12300, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:33:45.071326', 'step': 12300, 'epoch': 2} {'type': 'loss', 'content': 0.1253882348537445, 'timestamp': '2025-10-01 04:33:45.073521', 'step': 12301, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:45.127295', 'step': 12301, 'epoch': 2} {'type': 'loss', 'content': 0.12384394556283951, 'timestamp': '2025-10-01 04:33:45.129594', 'step': 12302, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:33:45.184183', 'step': 12302, 'epoch': 2} {'type': 'loss', 'content': 0.18388156592845917, 'timestamp': '2025-10-01 04:33:45.186831', 'step': 12303, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:45.239820', 'step': 12303, 'epoch': 2} {'type': 'loss', 'content': 0.08937036246061325, 'timestamp': '2025-10-01 04:33:45.245891', 'step': 12304, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:45.299481', 'step': 12304, 'epoch': 2} {'type': 'loss', 'content': 0.10366291552782059, 'timestamp': '2025-10-01 04:33:45.301796', 'step': 12305, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:45.355923', 'step': 12305, 'epoch': 2} {'type': 'loss', 'content': 0.06617792695760727, 'timestamp': '2025-10-01 04:33:45.358399', 'step': 12306, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:45.417034', 'step': 12306, 'epoch': 2} {'type': 'loss', 'content': 0.12883473932743073, 'timestamp': '2025-10-01 04:33:45.419707', 'step': 12307, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:45.473387', 'step': 12307, 'epoch': 2} {'type': 'loss', 'content': 0.11613566428422928, 'timestamp': '2025-10-01 04:33:45.479035', 'step': 12308, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:45.532049', 'step': 12308, 'epoch': 2} {'type': 'loss', 'content': 0.18388143181800842, 'timestamp': '2025-10-01 04:33:45.534217', 'step': 12309, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:45.590035', 'step': 12309, 'epoch': 2} {'type': 'loss', 'content': 0.13013856112957, 'timestamp': '2025-10-01 04:33:45.592343', 'step': 12310, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:45.645790', 'step': 12310, 'epoch': 2} {'type': 'loss', 'content': 0.10735555738210678, 'timestamp': '2025-10-01 04:33:45.647860', 'step': 12311, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:33:45.701678', 'step': 12311, 'epoch': 2} {'type': 'loss', 'content': 0.1269913911819458, 'timestamp': '2025-10-01 04:33:45.707477', 'step': 12312, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:45.760322', 'step': 12312, 'epoch': 2} {'type': 'loss', 'content': 0.08578196913003922, 'timestamp': '2025-10-01 04:33:45.762586', 'step': 12313, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:45.816276', 'step': 12313, 'epoch': 2} {'type': 'loss', 'content': 0.10864419490098953, 'timestamp': '2025-10-01 04:33:45.818484', 'step': 12314, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:45.872302', 'step': 12314, 'epoch': 2} {'type': 'loss', 'content': 0.12368258088827133, 'timestamp': '2025-10-01 04:33:45.874670', 'step': 12315, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:33:45.928253', 'step': 12315, 'epoch': 2} {'type': 'loss', 'content': 0.1497713327407837, 'timestamp': '2025-10-01 04:33:45.934288', 'step': 12316, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:33:45.987122', 'step': 12316, 'epoch': 2} {'type': 'loss', 'content': 0.05607036128640175, 'timestamp': '2025-10-01 04:33:45.996885', 'step': 12317, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:46.050156', 'step': 12317, 'epoch': 2} {'type': 'loss', 'content': 0.27474188804626465, 'timestamp': '2025-10-01 04:33:46.052052', 'step': 12318, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:46.106797', 'step': 12318, 'epoch': 2} {'type': 'loss', 'content': 0.13525989651679993, 'timestamp': '2025-10-01 04:33:46.109225', 'step': 12319, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:46.163558', 'step': 12319, 'epoch': 2} {'type': 'loss', 'content': 0.08636899292469025, 'timestamp': '2025-10-01 04:33:46.169372', 'step': 12320, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:46.233537', 'step': 12320, 'epoch': 2} {'type': 'loss', 'content': 0.18203239142894745, 'timestamp': '2025-10-01 04:33:46.236144', 'step': 12321, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:46.289347', 'step': 12321, 'epoch': 2} {'type': 'loss', 'content': 0.09719513356685638, 'timestamp': '2025-10-01 04:33:46.291586', 'step': 12322, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:33:46.345133', 'step': 12322, 'epoch': 2} {'type': 'loss', 'content': 0.2163229137659073, 'timestamp': '2025-10-01 04:33:46.347780', 'step': 12323, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:46.401589', 'step': 12323, 'epoch': 2} {'type': 'loss', 'content': 0.049261096864938736, 'timestamp': '2025-10-01 04:33:46.407611', 'step': 12324, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:46.460657', 'step': 12324, 'epoch': 2} {'type': 'loss', 'content': 0.10123881697654724, 'timestamp': '2025-10-01 04:33:46.462890', 'step': 12325, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:33:46.516553', 'step': 12325, 'epoch': 2} {'type': 'loss', 'content': 0.1584898680448532, 'timestamp': '2025-10-01 04:33:46.518558', 'step': 12326, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:46.573085', 'step': 12326, 'epoch': 2} {'type': 'loss', 'content': 0.13626506924629211, 'timestamp': '2025-10-01 04:33:46.575202', 'step': 12327, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:46.632913', 'step': 12327, 'epoch': 2} {'type': 'loss', 'content': 0.15248075127601624, 'timestamp': '2025-10-01 04:33:46.640056', 'step': 12328, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:46.694036', 'step': 12328, 'epoch': 2} {'type': 'loss', 'content': 0.23793719708919525, 'timestamp': '2025-10-01 04:33:46.696159', 'step': 12329, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:46.757825', 'step': 12329, 'epoch': 2} {'type': 'loss', 'content': 0.15062853693962097, 'timestamp': '2025-10-01 04:33:46.760168', 'step': 12330, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:46.813851', 'step': 12330, 'epoch': 2} {'type': 'loss', 'content': 0.0921642854809761, 'timestamp': '2025-10-01 04:33:46.817239', 'step': 12331, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:46.871233', 'step': 12331, 'epoch': 2} {'type': 'loss', 'content': 0.06541469693183899, 'timestamp': '2025-10-01 04:33:46.877574', 'step': 12332, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:46.930563', 'step': 12332, 'epoch': 2} {'type': 'loss', 'content': 0.08667509257793427, 'timestamp': '2025-10-01 04:33:46.933026', 'step': 12333, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:46.986277', 'step': 12333, 'epoch': 2} {'type': 'loss', 'content': 0.12786564230918884, 'timestamp': '2025-10-01 04:33:46.988922', 'step': 12334, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:47.043324', 'step': 12334, 'epoch': 2} {'type': 'loss', 'content': 0.13885018229484558, 'timestamp': '2025-10-01 04:33:47.045690', 'step': 12335, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:47.099559', 'step': 12335, 'epoch': 2} {'type': 'loss', 'content': 0.11345566064119339, 'timestamp': '2025-10-01 04:33:47.109828', 'step': 12336, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:47.172106', 'step': 12336, 'epoch': 2} {'type': 'loss', 'content': 0.12580226361751556, 'timestamp': '2025-10-01 04:33:47.174238', 'step': 12337, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:33:47.228447', 'step': 12337, 'epoch': 2} {'type': 'loss', 'content': 0.0809401348233223, 'timestamp': '2025-10-01 04:33:47.230553', 'step': 12338, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:47.288317', 'step': 12338, 'epoch': 2} {'type': 'loss', 'content': 0.23655083775520325, 'timestamp': '2025-10-01 04:33:47.291332', 'step': 12339, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:47.344834', 'step': 12339, 'epoch': 2} {'type': 'loss', 'content': 0.11483578383922577, 'timestamp': '2025-10-01 04:33:47.350572', 'step': 12340, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:47.404419', 'step': 12340, 'epoch': 2} {'type': 'loss', 'content': 0.17818216979503632, 'timestamp': '2025-10-01 04:33:47.406428', 'step': 12341, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:47.459529', 'step': 12341, 'epoch': 2} {'type': 'loss', 'content': 0.1230076476931572, 'timestamp': '2025-10-01 04:33:47.461812', 'step': 12342, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:33:47.517976', 'step': 12342, 'epoch': 2} {'type': 'loss', 'content': 0.08039705455303192, 'timestamp': '2025-10-01 04:33:47.520282', 'step': 12343, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:47.584504', 'step': 12343, 'epoch': 2} {'type': 'loss', 'content': 0.14527978003025055, 'timestamp': '2025-10-01 04:33:47.597565', 'step': 12344, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:47.651486', 'step': 12344, 'epoch': 2} {'type': 'loss', 'content': 0.12616200745105743, 'timestamp': '2025-10-01 04:33:47.653602', 'step': 12345, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:47.707292', 'step': 12345, 'epoch': 2} {'type': 'loss', 'content': 0.16539685428142548, 'timestamp': '2025-10-01 04:33:47.709363', 'step': 12346, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:47.773183', 'step': 12346, 'epoch': 2} {'type': 'loss', 'content': 0.1240275427699089, 'timestamp': '2025-10-01 04:33:47.775713', 'step': 12347, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:47.832229', 'step': 12347, 'epoch': 2} {'type': 'loss', 'content': 0.10227806866168976, 'timestamp': '2025-10-01 04:33:47.838399', 'step': 12348, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:47.892850', 'step': 12348, 'epoch': 2} {'type': 'loss', 'content': 0.1678851842880249, 'timestamp': '2025-10-01 04:33:47.894918', 'step': 12349, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:47.949118', 'step': 12349, 'epoch': 2} {'type': 'loss', 'content': 0.16221927106380463, 'timestamp': '2025-10-01 04:33:47.951273', 'step': 12350, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:48.005416', 'step': 12350, 'epoch': 2} {'type': 'loss', 'content': 0.15252335369586945, 'timestamp': '2025-10-01 04:33:48.007530', 'step': 12351, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:48.061726', 'step': 12351, 'epoch': 2} {'type': 'loss', 'content': 0.08695606142282486, 'timestamp': '2025-10-01 04:33:48.067945', 'step': 12352, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:48.120994', 'step': 12352, 'epoch': 2} {'type': 'loss', 'content': 0.16199544072151184, 'timestamp': '2025-10-01 04:33:48.122834', 'step': 12353, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:48.175915', 'step': 12353, 'epoch': 2} {'type': 'loss', 'content': 0.1586412638425827, 'timestamp': '2025-10-01 04:33:48.179917', 'step': 12354, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:33:48.233206', 'step': 12354, 'epoch': 2} {'type': 'loss', 'content': 0.2136470526456833, 'timestamp': '2025-10-01 04:33:48.235422', 'step': 12355, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:48.289689', 'step': 12355, 'epoch': 2} {'type': 'loss', 'content': 0.07155315577983856, 'timestamp': '2025-10-01 04:33:48.295482', 'step': 12356, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:33:48.360489', 'step': 12356, 'epoch': 2} {'type': 'loss', 'content': 0.12142965942621231, 'timestamp': '2025-10-01 04:33:48.362905', 'step': 12357, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:48.416298', 'step': 12357, 'epoch': 2} {'type': 'loss', 'content': 0.06413590908050537, 'timestamp': '2025-10-01 04:33:48.418557', 'step': 12358, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:48.473296', 'step': 12358, 'epoch': 2} {'type': 'loss', 'content': 0.14954137802124023, 'timestamp': '2025-10-01 04:33:48.475532', 'step': 12359, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:33:48.530316', 'step': 12359, 'epoch': 2} {'type': 'loss', 'content': 0.16694359481334686, 'timestamp': '2025-10-01 04:33:48.536296', 'step': 12360, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:48.598698', 'step': 12360, 'epoch': 2} {'type': 'loss', 'content': 0.10984638333320618, 'timestamp': '2025-10-01 04:33:48.600879', 'step': 12361, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:33:48.654213', 'step': 12361, 'epoch': 2} {'type': 'loss', 'content': 0.16929806768894196, 'timestamp': '2025-10-01 04:33:48.656293', 'step': 12362, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:48.709712', 'step': 12362, 'epoch': 2} {'type': 'loss', 'content': 0.09462852030992508, 'timestamp': '2025-10-01 04:33:48.712220', 'step': 12363, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:48.766004', 'step': 12363, 'epoch': 2} {'type': 'loss', 'content': 0.09613405913114548, 'timestamp': '2025-10-01 04:33:48.772195', 'step': 12364, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:48.828787', 'step': 12364, 'epoch': 2} {'type': 'loss', 'content': 0.1016896441578865, 'timestamp': '2025-10-01 04:33:48.842588', 'step': 12365, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:48.901998', 'step': 12365, 'epoch': 2} {'type': 'loss', 'content': 0.10962630063295364, 'timestamp': '2025-10-01 04:33:48.904246', 'step': 12366, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:48.958905', 'step': 12366, 'epoch': 2} {'type': 'loss', 'content': 0.11776666343212128, 'timestamp': '2025-10-01 04:33:48.961452', 'step': 12367, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:49.016355', 'step': 12367, 'epoch': 2} {'type': 'loss', 'content': 0.1226242333650589, 'timestamp': '2025-10-01 04:33:49.022775', 'step': 12368, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:49.078485', 'step': 12368, 'epoch': 2} {'type': 'loss', 'content': 0.10790357738733292, 'timestamp': '2025-10-01 04:33:49.084265', 'step': 12369, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:49.138334', 'step': 12369, 'epoch': 2} {'type': 'loss', 'content': 0.09250953793525696, 'timestamp': '2025-10-01 04:33:49.141535', 'step': 12370, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:49.196427', 'step': 12370, 'epoch': 2} {'type': 'loss', 'content': 0.15493042767047882, 'timestamp': '2025-10-01 04:33:49.199044', 'step': 12371, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:49.270519', 'step': 12371, 'epoch': 2} {'type': 'loss', 'content': 0.1452566385269165, 'timestamp': '2025-10-01 04:33:49.276759', 'step': 12372, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:49.331506', 'step': 12372, 'epoch': 2} {'type': 'loss', 'content': 0.08047961443662643, 'timestamp': '2025-10-01 04:33:49.334072', 'step': 12373, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:49.395399', 'step': 12373, 'epoch': 2} {'type': 'loss', 'content': 0.1733240783214569, 'timestamp': '2025-10-01 04:33:49.398325', 'step': 12374, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:49.453560', 'step': 12374, 'epoch': 2} {'type': 'loss', 'content': 0.16244883835315704, 'timestamp': '2025-10-01 04:33:49.456212', 'step': 12375, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:49.511632', 'step': 12375, 'epoch': 2} {'type': 'loss', 'content': 0.1112331673502922, 'timestamp': '2025-10-01 04:33:49.517467', 'step': 12376, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:49.571581', 'step': 12376, 'epoch': 2} {'type': 'loss', 'content': 0.11281765252351761, 'timestamp': '2025-10-01 04:33:49.573671', 'step': 12377, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:49.630873', 'step': 12377, 'epoch': 2} {'type': 'loss', 'content': 0.10076379030942917, 'timestamp': '2025-10-01 04:33:49.633360', 'step': 12378, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:49.689414', 'step': 12378, 'epoch': 2} {'type': 'loss', 'content': 0.07496552169322968, 'timestamp': '2025-10-01 04:33:49.691775', 'step': 12379, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:49.746201', 'step': 12379, 'epoch': 2} {'type': 'loss', 'content': 0.11620049923658371, 'timestamp': '2025-10-01 04:33:49.753460', 'step': 12380, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:49.807264', 'step': 12380, 'epoch': 2} {'type': 'loss', 'content': 0.1055959090590477, 'timestamp': '2025-10-01 04:33:49.813672', 'step': 12381, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:49.869533', 'step': 12381, 'epoch': 2} {'type': 'loss', 'content': 0.08664125204086304, 'timestamp': '2025-10-01 04:33:49.872151', 'step': 12382, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:49.927267', 'step': 12382, 'epoch': 2} {'type': 'loss', 'content': 0.1792639195919037, 'timestamp': '2025-10-01 04:33:49.929886', 'step': 12383, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:49.998913', 'step': 12383, 'epoch': 2} {'type': 'loss', 'content': 0.10472463816404343, 'timestamp': '2025-10-01 04:33:50.012271', 'step': 12384, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:50.067209', 'step': 12384, 'epoch': 2} {'type': 'loss', 'content': 0.12684957683086395, 'timestamp': '2025-10-01 04:33:50.073297', 'step': 12385, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:50.139688', 'step': 12385, 'epoch': 2} {'type': 'loss', 'content': 0.1254366934299469, 'timestamp': '2025-10-01 04:33:50.142835', 'step': 12386, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:33:50.200951', 'step': 12386, 'epoch': 2} {'type': 'loss', 'content': 0.26182281970977783, 'timestamp': '2025-10-01 04:33:50.203754', 'step': 12387, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:50.259411', 'step': 12387, 'epoch': 2} {'type': 'loss', 'content': 0.11359995603561401, 'timestamp': '2025-10-01 04:33:50.265444', 'step': 12388, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:50.321191', 'step': 12388, 'epoch': 2} {'type': 'loss', 'content': 0.16570712625980377, 'timestamp': '2025-10-01 04:33:50.323887', 'step': 12389, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:33:50.379390', 'step': 12389, 'epoch': 2} {'type': 'loss', 'content': 0.10082308202981949, 'timestamp': '2025-10-01 04:33:50.386881', 'step': 12390, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:50.444690', 'step': 12390, 'epoch': 2} {'type': 'loss', 'content': 0.0852411612868309, 'timestamp': '2025-10-01 04:33:50.453320', 'step': 12391, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:50.508761', 'step': 12391, 'epoch': 2} {'type': 'loss', 'content': 0.14912062883377075, 'timestamp': '2025-10-01 04:33:50.515411', 'step': 12392, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:50.577755', 'step': 12392, 'epoch': 2} {'type': 'loss', 'content': 0.05480964854359627, 'timestamp': '2025-10-01 04:33:50.580868', 'step': 12393, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:50.640726', 'step': 12393, 'epoch': 2} {'type': 'loss', 'content': 0.1689511239528656, 'timestamp': '2025-10-01 04:33:50.643352', 'step': 12394, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:50.698942', 'step': 12394, 'epoch': 2} {'type': 'loss', 'content': 0.11234911531209946, 'timestamp': '2025-10-01 04:33:50.701041', 'step': 12395, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:50.758611', 'step': 12395, 'epoch': 2} {'type': 'loss', 'content': 0.14815884828567505, 'timestamp': '2025-10-01 04:33:50.765349', 'step': 12396, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:50.818285', 'step': 12396, 'epoch': 2} {'type': 'loss', 'content': 0.14388687908649445, 'timestamp': '2025-10-01 04:33:50.820583', 'step': 12397, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:50.874650', 'step': 12397, 'epoch': 2} {'type': 'loss', 'content': 0.13768020272254944, 'timestamp': '2025-10-01 04:33:50.876468', 'step': 12398, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:50.930358', 'step': 12398, 'epoch': 2} {'type': 'loss', 'content': 0.13610106706619263, 'timestamp': '2025-10-01 04:33:50.932598', 'step': 12399, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:50.986013', 'step': 12399, 'epoch': 2} {'type': 'loss', 'content': 0.07896268367767334, 'timestamp': '2025-10-01 04:33:50.991907', 'step': 12400, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:51.044548', 'step': 12400, 'epoch': 2} {'type': 'loss', 'content': 0.10289779305458069, 'timestamp': '2025-10-01 04:33:51.046799', 'step': 12401, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:51.100632', 'step': 12401, 'epoch': 2} {'type': 'loss', 'content': 0.1858038306236267, 'timestamp': '2025-10-01 04:33:51.103116', 'step': 12402, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:51.157294', 'step': 12402, 'epoch': 2} {'type': 'loss', 'content': 0.0898839682340622, 'timestamp': '2025-10-01 04:33:51.159881', 'step': 12403, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:51.220258', 'step': 12403, 'epoch': 2} {'type': 'loss', 'content': 0.25248396396636963, 'timestamp': '2025-10-01 04:33:51.226071', 'step': 12404, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:51.280470', 'step': 12404, 'epoch': 2} {'type': 'loss', 'content': 0.1362317055463791, 'timestamp': '2025-10-01 04:33:51.282601', 'step': 12405, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:33:51.338122', 'step': 12405, 'epoch': 2} {'type': 'loss', 'content': 0.2635629177093506, 'timestamp': '2025-10-01 04:33:51.340005', 'step': 12406, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:33:51.394021', 'step': 12406, 'epoch': 2} {'type': 'loss', 'content': 0.15169253945350647, 'timestamp': '2025-10-01 04:33:51.396266', 'step': 12407, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:51.449852', 'step': 12407, 'epoch': 2} {'type': 'loss', 'content': 0.12005684524774551, 'timestamp': '2025-10-01 04:33:51.455713', 'step': 12408, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:51.509497', 'step': 12408, 'epoch': 2} {'type': 'loss', 'content': 0.1651378870010376, 'timestamp': '2025-10-01 04:33:51.511551', 'step': 12409, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:51.564669', 'step': 12409, 'epoch': 2} {'type': 'loss', 'content': 0.0684807077050209, 'timestamp': '2025-10-01 04:33:51.567005', 'step': 12410, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:51.620895', 'step': 12410, 'epoch': 2} {'type': 'loss', 'content': 0.07101444900035858, 'timestamp': '2025-10-01 04:33:51.623157', 'step': 12411, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:33:51.681073', 'step': 12411, 'epoch': 2} {'type': 'loss', 'content': 0.10668734461069107, 'timestamp': '2025-10-01 04:33:51.686895', 'step': 12412, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:51.739912', 'step': 12412, 'epoch': 2} {'type': 'loss', 'content': 0.12321512401103973, 'timestamp': '2025-10-01 04:33:51.742132', 'step': 12413, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:51.796408', 'step': 12413, 'epoch': 2} {'type': 'loss', 'content': 0.12000396847724915, 'timestamp': '2025-10-01 04:33:51.798835', 'step': 12414, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:33:51.852843', 'step': 12414, 'epoch': 2} {'type': 'loss', 'content': 0.11411365866661072, 'timestamp': '2025-10-01 04:33:51.855342', 'step': 12415, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:51.909338', 'step': 12415, 'epoch': 2} {'type': 'loss', 'content': 0.15423966944217682, 'timestamp': '2025-10-01 04:33:51.915134', 'step': 12416, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:33:51.968118', 'step': 12416, 'epoch': 2} {'type': 'loss', 'content': 0.10359519720077515, 'timestamp': '2025-10-01 04:33:51.970143', 'step': 12417, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:52.024145', 'step': 12417, 'epoch': 2} {'type': 'loss', 'content': 0.11065424233675003, 'timestamp': '2025-10-01 04:33:52.026440', 'step': 12418, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:52.080790', 'step': 12418, 'epoch': 2} {'type': 'loss', 'content': 0.18941061198711395, 'timestamp': '2025-10-01 04:33:52.083219', 'step': 12419, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:52.136811', 'step': 12419, 'epoch': 2} {'type': 'loss', 'content': 0.14274653792381287, 'timestamp': '2025-10-01 04:33:52.142981', 'step': 12420, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:52.196192', 'step': 12420, 'epoch': 2} {'type': 'loss', 'content': 0.1080225482583046, 'timestamp': '2025-10-01 04:33:52.198332', 'step': 12421, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:52.251746', 'step': 12421, 'epoch': 2} {'type': 'loss', 'content': 0.17240694165229797, 'timestamp': '2025-10-01 04:33:52.253954', 'step': 12422, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:52.308362', 'step': 12422, 'epoch': 2} {'type': 'loss', 'content': 0.09819598495960236, 'timestamp': '2025-10-01 04:33:52.311722', 'step': 12423, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:52.364558', 'step': 12423, 'epoch': 2} {'type': 'loss', 'content': 0.11124670505523682, 'timestamp': '2025-10-01 04:33:52.370424', 'step': 12424, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:52.424290', 'step': 12424, 'epoch': 2} {'type': 'loss', 'content': 0.08565052598714828, 'timestamp': '2025-10-01 04:33:52.426215', 'step': 12425, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:52.480111', 'step': 12425, 'epoch': 2} {'type': 'loss', 'content': 0.15915600955486298, 'timestamp': '2025-10-01 04:33:52.482161', 'step': 12426, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:52.535683', 'step': 12426, 'epoch': 2} {'type': 'loss', 'content': 0.1637398600578308, 'timestamp': '2025-10-01 04:33:52.539111', 'step': 12427, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:52.595194', 'step': 12427, 'epoch': 2} {'type': 'loss', 'content': 0.03951653838157654, 'timestamp': '2025-10-01 04:33:52.601555', 'step': 12428, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:33:52.655220', 'step': 12428, 'epoch': 2} {'type': 'loss', 'content': 0.2065584510564804, 'timestamp': '2025-10-01 04:33:52.657551', 'step': 12429, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:52.713212', 'step': 12429, 'epoch': 2} {'type': 'loss', 'content': 0.12501074373722076, 'timestamp': '2025-10-01 04:33:52.715532', 'step': 12430, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:52.771469', 'step': 12430, 'epoch': 2} {'type': 'loss', 'content': 0.12922920286655426, 'timestamp': '2025-10-01 04:33:52.774215', 'step': 12431, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:52.840225', 'step': 12431, 'epoch': 2} {'type': 'loss', 'content': 0.03617642819881439, 'timestamp': '2025-10-01 04:33:52.847902', 'step': 12432, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:33:52.903455', 'step': 12432, 'epoch': 2} {'type': 'loss', 'content': 0.10849392414093018, 'timestamp': '2025-10-01 04:33:52.906008', 'step': 12433, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:52.962001', 'step': 12433, 'epoch': 2} {'type': 'loss', 'content': 0.1437290459871292, 'timestamp': '2025-10-01 04:33:52.965143', 'step': 12434, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:53.020020', 'step': 12434, 'epoch': 2} {'type': 'loss', 'content': 0.06316541135311127, 'timestamp': '2025-10-01 04:33:53.023452', 'step': 12435, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:33:53.085421', 'step': 12435, 'epoch': 2} {'type': 'loss', 'content': 0.09393325448036194, 'timestamp': '2025-10-01 04:33:53.091734', 'step': 12436, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:53.146519', 'step': 12436, 'epoch': 2} {'type': 'loss', 'content': 0.1317117065191269, 'timestamp': '2025-10-01 04:33:53.148599', 'step': 12437, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:33:53.202697', 'step': 12437, 'epoch': 2} {'type': 'loss', 'content': 0.10455943644046783, 'timestamp': '2025-10-01 04:33:53.204353', 'step': 12438, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:33:53.273569', 'step': 12438, 'epoch': 2} {'type': 'loss', 'content': 0.22369472682476044, 'timestamp': '2025-10-01 04:33:53.283112', 'step': 12439, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:53.337269', 'step': 12439, 'epoch': 2} {'type': 'loss', 'content': 0.10456811636686325, 'timestamp': '2025-10-01 04:33:53.343202', 'step': 12440, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:53.399565', 'step': 12440, 'epoch': 2} {'type': 'loss', 'content': 0.1016225665807724, 'timestamp': '2025-10-01 04:33:53.401450', 'step': 12441, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:33:53.454441', 'step': 12441, 'epoch': 2} {'type': 'loss', 'content': 0.08447340130805969, 'timestamp': '2025-10-01 04:33:53.456407', 'step': 12442, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:53.510197', 'step': 12442, 'epoch': 2} {'type': 'loss', 'content': 0.08381923288106918, 'timestamp': '2025-10-01 04:33:53.512616', 'step': 12443, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:53.571384', 'step': 12443, 'epoch': 2} {'type': 'loss', 'content': 0.1423155814409256, 'timestamp': '2025-10-01 04:33:53.577298', 'step': 12444, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:53.630993', 'step': 12444, 'epoch': 2} {'type': 'loss', 'content': 0.19066260755062103, 'timestamp': '2025-10-01 04:33:53.633422', 'step': 12445, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:33:53.686769', 'step': 12445, 'epoch': 2} {'type': 'loss', 'content': 0.15657518804073334, 'timestamp': '2025-10-01 04:33:53.691097', 'step': 12446, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:33:53.744576', 'step': 12446, 'epoch': 2} {'type': 'loss', 'content': 0.12039994448423386, 'timestamp': '2025-10-01 04:33:53.746635', 'step': 12447, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:53.800633', 'step': 12447, 'epoch': 2} {'type': 'loss', 'content': 0.10706824064254761, 'timestamp': '2025-10-01 04:33:53.806389', 'step': 12448, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:53.859247', 'step': 12448, 'epoch': 2} {'type': 'loss', 'content': 0.06698786467313766, 'timestamp': '2025-10-01 04:33:53.866410', 'step': 12449, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:53.937555', 'step': 12449, 'epoch': 2} {'type': 'loss', 'content': 0.09642807394266129, 'timestamp': '2025-10-01 04:33:53.939853', 'step': 12450, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:53.994414', 'step': 12450, 'epoch': 2} {'type': 'loss', 'content': 0.19150595366954803, 'timestamp': '2025-10-01 04:33:53.996542', 'step': 12451, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:33:54.049548', 'step': 12451, 'epoch': 2} {'type': 'loss', 'content': 0.16722296178340912, 'timestamp': '2025-10-01 04:33:54.055267', 'step': 12452, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:54.109187', 'step': 12452, 'epoch': 2} {'type': 'loss', 'content': 0.1063629612326622, 'timestamp': '2025-10-01 04:33:54.111688', 'step': 12453, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:54.182337', 'step': 12453, 'epoch': 2} {'type': 'loss', 'content': 0.23694008588790894, 'timestamp': '2025-10-01 04:33:54.188296', 'step': 12454, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:54.255276', 'step': 12454, 'epoch': 2} {'type': 'loss', 'content': 0.12254578620195389, 'timestamp': '2025-10-01 04:33:54.257464', 'step': 12455, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:54.316846', 'step': 12455, 'epoch': 2} {'type': 'loss', 'content': 0.12337853014469147, 'timestamp': '2025-10-01 04:33:54.334960', 'step': 12456, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:54.389409', 'step': 12456, 'epoch': 2} {'type': 'loss', 'content': 0.09878256916999817, 'timestamp': '2025-10-01 04:33:54.391401', 'step': 12457, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:33:54.445114', 'step': 12457, 'epoch': 2} {'type': 'loss', 'content': 0.11410240828990936, 'timestamp': '2025-10-01 04:33:54.448545', 'step': 12458, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:54.502717', 'step': 12458, 'epoch': 2} {'type': 'loss', 'content': 0.1997849941253662, 'timestamp': '2025-10-01 04:33:54.505449', 'step': 12459, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-01 04:33:54.577284', 'step': 12459, 'epoch': 2} {'type': 'loss', 'content': 0.09887941181659698, 'timestamp': '2025-10-01 04:33:54.584453', 'step': 12460, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:54.641546', 'step': 12460, 'epoch': 2} {'type': 'loss', 'content': 0.1063561886548996, 'timestamp': '2025-10-01 04:33:54.647103', 'step': 12461, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:54.702615', 'step': 12461, 'epoch': 2} {'type': 'loss', 'content': 0.0941397175192833, 'timestamp': '2025-10-01 04:33:54.704708', 'step': 12462, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:54.762483', 'step': 12462, 'epoch': 2} {'type': 'loss', 'content': 0.13791829347610474, 'timestamp': '2025-10-01 04:33:54.764807', 'step': 12463, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:54.820711', 'step': 12463, 'epoch': 2} {'type': 'loss', 'content': 0.11722353845834732, 'timestamp': '2025-10-01 04:33:54.827516', 'step': 12464, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:54.882128', 'step': 12464, 'epoch': 2} {'type': 'loss', 'content': 0.11339299380779266, 'timestamp': '2025-10-01 04:33:54.884485', 'step': 12465, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:54.940804', 'step': 12465, 'epoch': 2} {'type': 'loss', 'content': 0.13397622108459473, 'timestamp': '2025-10-01 04:33:54.942973', 'step': 12466, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:55.000030', 'step': 12466, 'epoch': 2} {'type': 'loss', 'content': 0.08905071765184402, 'timestamp': '2025-10-01 04:33:55.002213', 'step': 12467, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:55.057744', 'step': 12467, 'epoch': 2} {'type': 'loss', 'content': 0.09357953816652298, 'timestamp': '2025-10-01 04:33:55.064043', 'step': 12468, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:33:55.117626', 'step': 12468, 'epoch': 2} {'type': 'loss', 'content': 0.16045963764190674, 'timestamp': '2025-10-01 04:33:55.119849', 'step': 12469, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:55.174004', 'step': 12469, 'epoch': 2} {'type': 'loss', 'content': 0.11824201047420502, 'timestamp': '2025-10-01 04:33:55.176066', 'step': 12470, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:55.229395', 'step': 12470, 'epoch': 2} {'type': 'loss', 'content': 0.1860325038433075, 'timestamp': '2025-10-01 04:33:55.234951', 'step': 12471, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:55.289533', 'step': 12471, 'epoch': 2} {'type': 'loss', 'content': 0.1318138837814331, 'timestamp': '2025-10-01 04:33:55.295569', 'step': 12472, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:55.350329', 'step': 12472, 'epoch': 2} {'type': 'loss', 'content': 0.13272131979465485, 'timestamp': '2025-10-01 04:33:55.353236', 'step': 12473, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:55.408831', 'step': 12473, 'epoch': 2} {'type': 'loss', 'content': 0.1228884607553482, 'timestamp': '2025-10-01 04:33:55.410991', 'step': 12474, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:55.466622', 'step': 12474, 'epoch': 2} {'type': 'loss', 'content': 0.07219548523426056, 'timestamp': '2025-10-01 04:33:55.469552', 'step': 12475, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:55.523749', 'step': 12475, 'epoch': 2} {'type': 'loss', 'content': 0.1154874935746193, 'timestamp': '2025-10-01 04:33:55.530381', 'step': 12476, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:55.598466', 'step': 12476, 'epoch': 2} {'type': 'loss', 'content': 0.18720273673534393, 'timestamp': '2025-10-01 04:33:55.600624', 'step': 12477, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:55.654018', 'step': 12477, 'epoch': 2} {'type': 'loss', 'content': 0.14494958519935608, 'timestamp': '2025-10-01 04:33:55.656250', 'step': 12478, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:55.710905', 'step': 12478, 'epoch': 2} {'type': 'loss', 'content': 0.09936574101448059, 'timestamp': '2025-10-01 04:33:55.713055', 'step': 12479, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:55.766856', 'step': 12479, 'epoch': 2} {'type': 'loss', 'content': 0.13966703414916992, 'timestamp': '2025-10-01 04:33:55.773114', 'step': 12480, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:55.830584', 'step': 12480, 'epoch': 2} {'type': 'loss', 'content': 0.05087150260806084, 'timestamp': '2025-10-01 04:33:55.832984', 'step': 12481, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:55.886935', 'step': 12481, 'epoch': 2} {'type': 'loss', 'content': 0.1151978075504303, 'timestamp': '2025-10-01 04:33:55.889073', 'step': 12482, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:55.943061', 'step': 12482, 'epoch': 2} {'type': 'loss', 'content': 0.09024423360824585, 'timestamp': '2025-10-01 04:33:55.945211', 'step': 12483, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:55.999359', 'step': 12483, 'epoch': 2} {'type': 'loss', 'content': 0.13173604011535645, 'timestamp': '2025-10-01 04:33:56.005488', 'step': 12484, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:33:56.059899', 'step': 12484, 'epoch': 2} {'type': 'loss', 'content': 0.1132335513830185, 'timestamp': '2025-10-01 04:33:56.062065', 'step': 12485, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:33:56.118783', 'step': 12485, 'epoch': 2} {'type': 'loss', 'content': 0.14648397266864777, 'timestamp': '2025-10-01 04:33:56.120982', 'step': 12486, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:56.178415', 'step': 12486, 'epoch': 2} {'type': 'loss', 'content': 0.19499805569648743, 'timestamp': '2025-10-01 04:33:56.189477', 'step': 12487, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:56.247356', 'step': 12487, 'epoch': 2} {'type': 'loss', 'content': 0.13045094907283783, 'timestamp': '2025-10-01 04:33:56.255351', 'step': 12488, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:56.313571', 'step': 12488, 'epoch': 2} {'type': 'loss', 'content': 0.1518498808145523, 'timestamp': '2025-10-01 04:33:56.315903', 'step': 12489, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:56.375465', 'step': 12489, 'epoch': 2} {'type': 'loss', 'content': 0.11539459228515625, 'timestamp': '2025-10-01 04:33:56.377842', 'step': 12490, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:56.438640', 'step': 12490, 'epoch': 2} {'type': 'loss', 'content': 0.10321503132581711, 'timestamp': '2025-10-01 04:33:56.440978', 'step': 12491, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:56.510821', 'step': 12491, 'epoch': 2} {'type': 'loss', 'content': 0.12747465074062347, 'timestamp': '2025-10-01 04:33:56.517995', 'step': 12492, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:56.578426', 'step': 12492, 'epoch': 2} {'type': 'loss', 'content': 0.1733005791902542, 'timestamp': '2025-10-01 04:33:56.581311', 'step': 12493, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:56.649977', 'step': 12493, 'epoch': 2} {'type': 'loss', 'content': 0.12491276860237122, 'timestamp': '2025-10-01 04:33:56.653185', 'step': 12494, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:56.715203', 'step': 12494, 'epoch': 2} {'type': 'loss', 'content': 0.07215875387191772, 'timestamp': '2025-10-01 04:33:56.717561', 'step': 12495, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:56.782190', 'step': 12495, 'epoch': 2} {'type': 'loss', 'content': 0.2120872288942337, 'timestamp': '2025-10-01 04:33:56.795552', 'step': 12496, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:33:56.857822', 'step': 12496, 'epoch': 2} {'type': 'loss', 'content': 0.1140473261475563, 'timestamp': '2025-10-01 04:33:56.860675', 'step': 12497, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:56.924613', 'step': 12497, 'epoch': 2} {'type': 'loss', 'content': 0.13477353751659393, 'timestamp': '2025-10-01 04:33:56.928031', 'step': 12498, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:56.998719', 'step': 12498, 'epoch': 2} {'type': 'loss', 'content': 0.21647755801677704, 'timestamp': '2025-10-01 04:33:57.001021', 'step': 12499, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:57.066879', 'step': 12499, 'epoch': 2} {'type': 'loss', 'content': 0.16580818593502045, 'timestamp': '2025-10-01 04:33:57.074023', 'step': 12500, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 12500', 'timestamp': '2025-10-01 04:33:57.446496', 'step': 12500, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:57.502170', 'step': 12500, 'epoch': 2} {'type': 'loss', 'content': 0.1365053951740265, 'timestamp': '2025-10-01 04:33:57.511046', 'step': 12501, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:57.581855', 'step': 12501, 'epoch': 2} {'type': 'loss', 'content': 0.13719087839126587, 'timestamp': '2025-10-01 04:33:57.584172', 'step': 12502, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:57.642355', 'step': 12502, 'epoch': 2} {'type': 'loss', 'content': 0.15466363728046417, 'timestamp': '2025-10-01 04:33:57.646000', 'step': 12503, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:57.699884', 'step': 12503, 'epoch': 2} {'type': 'loss', 'content': 0.1717367172241211, 'timestamp': '2025-10-01 04:33:57.705996', 'step': 12504, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:57.759486', 'step': 12504, 'epoch': 2} {'type': 'loss', 'content': 0.14545190334320068, 'timestamp': '2025-10-01 04:33:57.761670', 'step': 12505, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:33:57.819617', 'step': 12505, 'epoch': 2} {'type': 'loss', 'content': 0.11094153672456741, 'timestamp': '2025-10-01 04:33:57.822655', 'step': 12506, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:57.877097', 'step': 12506, 'epoch': 2} {'type': 'loss', 'content': 0.1440454125404358, 'timestamp': '2025-10-01 04:33:57.879666', 'step': 12507, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:57.934068', 'step': 12507, 'epoch': 2} {'type': 'loss', 'content': 0.1119987815618515, 'timestamp': '2025-10-01 04:33:57.946235', 'step': 12508, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:33:58.001118', 'step': 12508, 'epoch': 2} {'type': 'loss', 'content': 0.09581451863050461, 'timestamp': '2025-10-01 04:33:58.003538', 'step': 12509, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:58.058291', 'step': 12509, 'epoch': 2} {'type': 'loss', 'content': 0.11335594207048416, 'timestamp': '2025-10-01 04:33:58.060899', 'step': 12510, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:58.118431', 'step': 12510, 'epoch': 2} {'type': 'loss', 'content': 0.1525937020778656, 'timestamp': '2025-10-01 04:33:58.120504', 'step': 12511, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:58.174237', 'step': 12511, 'epoch': 2} {'type': 'loss', 'content': 0.12348459661006927, 'timestamp': '2025-10-01 04:33:58.186267', 'step': 12512, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:58.246604', 'step': 12512, 'epoch': 2} {'type': 'loss', 'content': 0.12287610024213791, 'timestamp': '2025-10-01 04:33:58.248778', 'step': 12513, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:58.302388', 'step': 12513, 'epoch': 2} {'type': 'loss', 'content': 0.11984454095363617, 'timestamp': '2025-10-01 04:33:58.304457', 'step': 12514, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:33:58.358797', 'step': 12514, 'epoch': 2} {'type': 'loss', 'content': 0.15473490953445435, 'timestamp': '2025-10-01 04:33:58.361113', 'step': 12515, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:58.432857', 'step': 12515, 'epoch': 2} {'type': 'loss', 'content': 0.07767601311206818, 'timestamp': '2025-10-01 04:33:58.438782', 'step': 12516, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:33:58.492026', 'step': 12516, 'epoch': 2} {'type': 'loss', 'content': 0.24738600850105286, 'timestamp': '2025-10-01 04:33:58.494288', 'step': 12517, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:58.548251', 'step': 12517, 'epoch': 2} {'type': 'loss', 'content': 0.0781923308968544, 'timestamp': '2025-10-01 04:33:58.550536', 'step': 12518, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:33:58.604401', 'step': 12518, 'epoch': 2} {'type': 'loss', 'content': 0.10153046995401382, 'timestamp': '2025-10-01 04:33:58.606673', 'step': 12519, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:58.661895', 'step': 12519, 'epoch': 2} {'type': 'loss', 'content': 0.06618784368038177, 'timestamp': '2025-10-01 04:33:58.670326', 'step': 12520, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:58.724033', 'step': 12520, 'epoch': 2} {'type': 'loss', 'content': 0.07273098081350327, 'timestamp': '2025-10-01 04:33:58.726169', 'step': 12521, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:58.781726', 'step': 12521, 'epoch': 2} {'type': 'loss', 'content': 0.09468318521976471, 'timestamp': '2025-10-01 04:33:58.784239', 'step': 12522, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:58.838932', 'step': 12522, 'epoch': 2} {'type': 'loss', 'content': 0.09492453932762146, 'timestamp': '2025-10-01 04:33:58.841251', 'step': 12523, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:58.898350', 'step': 12523, 'epoch': 2} {'type': 'loss', 'content': 0.20121140778064728, 'timestamp': '2025-10-01 04:33:58.904052', 'step': 12524, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:58.957444', 'step': 12524, 'epoch': 2} {'type': 'loss', 'content': 0.12576203048229218, 'timestamp': '2025-10-01 04:33:58.960329', 'step': 12525, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:59.017661', 'step': 12525, 'epoch': 2} {'type': 'loss', 'content': 0.1894151270389557, 'timestamp': '2025-10-01 04:33:59.019862', 'step': 12526, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:59.078947', 'step': 12526, 'epoch': 2} {'type': 'loss', 'content': 0.14343015849590302, 'timestamp': '2025-10-01 04:33:59.081048', 'step': 12527, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:59.134798', 'step': 12527, 'epoch': 2} {'type': 'loss', 'content': 0.07091372460126877, 'timestamp': '2025-10-01 04:33:59.140555', 'step': 12528, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:33:59.193843', 'step': 12528, 'epoch': 2} {'type': 'loss', 'content': 0.15330833196640015, 'timestamp': '2025-10-01 04:33:59.195972', 'step': 12529, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:59.250489', 'step': 12529, 'epoch': 2} {'type': 'loss', 'content': 0.16268235445022583, 'timestamp': '2025-10-01 04:33:59.252792', 'step': 12530, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:59.307433', 'step': 12530, 'epoch': 2} {'type': 'loss', 'content': 0.14744916558265686, 'timestamp': '2025-10-01 04:33:59.310336', 'step': 12531, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:59.366067', 'step': 12531, 'epoch': 2} {'type': 'loss', 'content': 0.03199736401438713, 'timestamp': '2025-10-01 04:33:59.372098', 'step': 12532, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:59.425173', 'step': 12532, 'epoch': 2} {'type': 'loss', 'content': 0.09158504754304886, 'timestamp': '2025-10-01 04:33:59.427340', 'step': 12533, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:59.481200', 'step': 12533, 'epoch': 2} {'type': 'loss', 'content': 0.13353270292282104, 'timestamp': '2025-10-01 04:33:59.484277', 'step': 12534, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:59.537854', 'step': 12534, 'epoch': 2} {'type': 'loss', 'content': 0.10786699503660202, 'timestamp': '2025-10-01 04:33:59.540067', 'step': 12535, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:59.596585', 'step': 12535, 'epoch': 2} {'type': 'loss', 'content': 0.1623070240020752, 'timestamp': '2025-10-01 04:33:59.602365', 'step': 12536, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:59.655883', 'step': 12536, 'epoch': 2} {'type': 'loss', 'content': 0.09242840111255646, 'timestamp': '2025-10-01 04:33:59.658217', 'step': 12537, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:59.711876', 'step': 12537, 'epoch': 2} {'type': 'loss', 'content': 0.12319879233837128, 'timestamp': '2025-10-01 04:33:59.715457', 'step': 12538, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:33:59.780169', 'step': 12538, 'epoch': 2} {'type': 'loss', 'content': 0.1244918629527092, 'timestamp': '2025-10-01 04:33:59.782553', 'step': 12539, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:33:59.838100', 'step': 12539, 'epoch': 2} {'type': 'loss', 'content': 0.07187089323997498, 'timestamp': '2025-10-01 04:33:59.843858', 'step': 12540, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:33:59.898450', 'step': 12540, 'epoch': 2} {'type': 'loss', 'content': 0.1409057378768921, 'timestamp': '2025-10-01 04:33:59.900652', 'step': 12541, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:33:59.960160', 'step': 12541, 'epoch': 2} {'type': 'loss', 'content': 0.20505455136299133, 'timestamp': '2025-10-01 04:33:59.964061', 'step': 12542, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:34:00.022753', 'step': 12542, 'epoch': 2} {'type': 'loss', 'content': 0.09081240743398666, 'timestamp': '2025-10-01 04:34:00.028884', 'step': 12543, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:00.091286', 'step': 12543, 'epoch': 2} {'type': 'loss', 'content': 0.07158532738685608, 'timestamp': '2025-10-01 04:34:00.097405', 'step': 12544, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:00.152848', 'step': 12544, 'epoch': 2} {'type': 'loss', 'content': 0.10041561722755432, 'timestamp': '2025-10-01 04:34:00.155104', 'step': 12545, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:00.223380', 'step': 12545, 'epoch': 2} {'type': 'loss', 'content': 0.09399405121803284, 'timestamp': '2025-10-01 04:34:00.225652', 'step': 12546, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:00.280390', 'step': 12546, 'epoch': 2} {'type': 'loss', 'content': 0.19007407128810883, 'timestamp': '2025-10-01 04:34:00.282607', 'step': 12547, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:00.340866', 'step': 12547, 'epoch': 2} {'type': 'loss', 'content': 0.09706563502550125, 'timestamp': '2025-10-01 04:34:00.346876', 'step': 12548, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:00.405880', 'step': 12548, 'epoch': 2} {'type': 'loss', 'content': 0.07863973826169968, 'timestamp': '2025-10-01 04:34:00.408049', 'step': 12549, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:00.463646', 'step': 12549, 'epoch': 2} {'type': 'loss', 'content': 0.20832088589668274, 'timestamp': '2025-10-01 04:34:00.467876', 'step': 12550, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:00.521937', 'step': 12550, 'epoch': 2} {'type': 'loss', 'content': 0.19335618615150452, 'timestamp': '2025-10-01 04:34:00.524090', 'step': 12551, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:00.578177', 'step': 12551, 'epoch': 2} {'type': 'loss', 'content': 0.21969042718410492, 'timestamp': '2025-10-01 04:34:00.583748', 'step': 12552, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:00.637007', 'step': 12552, 'epoch': 2} {'type': 'loss', 'content': 0.060739245265722275, 'timestamp': '2025-10-01 04:34:00.641020', 'step': 12553, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:00.710396', 'step': 12553, 'epoch': 2} {'type': 'loss', 'content': 0.12203218042850494, 'timestamp': '2025-10-01 04:34:00.712686', 'step': 12554, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:00.766377', 'step': 12554, 'epoch': 2} {'type': 'loss', 'content': 0.1830281764268875, 'timestamp': '2025-10-01 04:34:00.768566', 'step': 12555, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:00.824247', 'step': 12555, 'epoch': 2} {'type': 'loss', 'content': 0.1892515867948532, 'timestamp': '2025-10-01 04:34:00.830876', 'step': 12556, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:00.884004', 'step': 12556, 'epoch': 2} {'type': 'loss', 'content': 0.1004304513335228, 'timestamp': '2025-10-01 04:34:00.886033', 'step': 12557, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:00.939597', 'step': 12557, 'epoch': 2} {'type': 'loss', 'content': 0.09403315931558609, 'timestamp': '2025-10-01 04:34:00.941634', 'step': 12558, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:00.995063', 'step': 12558, 'epoch': 2} {'type': 'loss', 'content': 0.14751122891902924, 'timestamp': '2025-10-01 04:34:00.997176', 'step': 12559, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:34:01.055496', 'step': 12559, 'epoch': 2} {'type': 'loss', 'content': 0.13271313905715942, 'timestamp': '2025-10-01 04:34:01.064123', 'step': 12560, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:34:01.123711', 'step': 12560, 'epoch': 2} {'type': 'loss', 'content': 0.07848331332206726, 'timestamp': '2025-10-01 04:34:01.126355', 'step': 12561, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:01.193964', 'step': 12561, 'epoch': 2} {'type': 'loss', 'content': 0.18319502472877502, 'timestamp': '2025-10-01 04:34:01.200759', 'step': 12562, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:01.287016', 'step': 12562, 'epoch': 2} {'type': 'loss', 'content': 0.23350998759269714, 'timestamp': '2025-10-01 04:34:01.299536', 'step': 12563, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:01.395289', 'step': 12563, 'epoch': 2} {'type': 'loss', 'content': 0.16762511432170868, 'timestamp': '2025-10-01 04:34:01.418381', 'step': 12564, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:01.501351', 'step': 12564, 'epoch': 2} {'type': 'loss', 'content': 0.14099663496017456, 'timestamp': '2025-10-01 04:34:01.516324', 'step': 12565, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:01.607765', 'step': 12565, 'epoch': 2} {'type': 'loss', 'content': 0.08414241671562195, 'timestamp': '2025-10-01 04:34:01.621044', 'step': 12566, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:01.698138', 'step': 12566, 'epoch': 2} {'type': 'loss', 'content': 0.11934122443199158, 'timestamp': '2025-10-01 04:34:01.702848', 'step': 12567, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:01.783505', 'step': 12567, 'epoch': 2} {'type': 'loss', 'content': 0.10757472366094589, 'timestamp': '2025-10-01 04:34:01.799548', 'step': 12568, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:01.870297', 'step': 12568, 'epoch': 2} {'type': 'loss', 'content': 0.20422546565532684, 'timestamp': '2025-10-01 04:34:01.876564', 'step': 12569, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:01.962801', 'step': 12569, 'epoch': 2} {'type': 'loss', 'content': 0.20094244182109833, 'timestamp': '2025-10-01 04:34:01.971560', 'step': 12570, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:02.057618', 'step': 12570, 'epoch': 2} {'type': 'loss', 'content': 0.13464361429214478, 'timestamp': '2025-10-01 04:34:02.061578', 'step': 12571, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:02.152166', 'step': 12571, 'epoch': 2} {'type': 'loss', 'content': 0.09583859145641327, 'timestamp': '2025-10-01 04:34:02.165906', 'step': 12572, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:02.245992', 'step': 12572, 'epoch': 2} {'type': 'loss', 'content': 0.12751580774784088, 'timestamp': '2025-10-01 04:34:02.267682', 'step': 12573, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:34:02.349244', 'step': 12573, 'epoch': 2} {'type': 'loss', 'content': 0.20144754648208618, 'timestamp': '2025-10-01 04:34:02.357417', 'step': 12574, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:34:02.416285', 'step': 12574, 'epoch': 2} {'type': 'loss', 'content': 0.10073154419660568, 'timestamp': '2025-10-01 04:34:02.418578', 'step': 12575, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:02.480056', 'step': 12575, 'epoch': 2} {'type': 'loss', 'content': 0.15089210867881775, 'timestamp': '2025-10-01 04:34:02.485901', 'step': 12576, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:34:02.541147', 'step': 12576, 'epoch': 2} {'type': 'loss', 'content': 0.10912846028804779, 'timestamp': '2025-10-01 04:34:02.543200', 'step': 12577, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:02.596389', 'step': 12577, 'epoch': 2} {'type': 'loss', 'content': 0.1818346530199051, 'timestamp': '2025-10-01 04:34:02.598582', 'step': 12578, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:02.652141', 'step': 12578, 'epoch': 2} {'type': 'loss', 'content': 0.141945481300354, 'timestamp': '2025-10-01 04:34:02.654131', 'step': 12579, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:02.707258', 'step': 12579, 'epoch': 2} {'type': 'loss', 'content': 0.18374426662921906, 'timestamp': '2025-10-01 04:34:02.713222', 'step': 12580, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:02.766249', 'step': 12580, 'epoch': 2} {'type': 'loss', 'content': 0.11823397129774094, 'timestamp': '2025-10-01 04:34:02.768173', 'step': 12581, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:02.822192', 'step': 12581, 'epoch': 2} {'type': 'loss', 'content': 0.176955446600914, 'timestamp': '2025-10-01 04:34:02.824372', 'step': 12582, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-01 04:34:16.056869', 'step': 12582, 'epoch': 2} {'type': 'pplx', 'content': 13587.58468925228, 'timestamp': '2025-10-01 04:34:16.060201', 'step': 12582, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:16.115131', 'step': 12582, 'epoch': 2} {'type': 'loss', 'content': 0.10157923400402069, 'timestamp': '2025-10-01 04:34:16.117563', 'step': 12583, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:16.172887', 'step': 12583, 'epoch': 2} {'type': 'loss', 'content': 0.09468390047550201, 'timestamp': '2025-10-01 04:34:16.183449', 'step': 12584, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:16.237461', 'step': 12584, 'epoch': 2} {'type': 'loss', 'content': 0.0940263494849205, 'timestamp': '2025-10-01 04:34:16.239651', 'step': 12585, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:34:16.293792', 'step': 12585, 'epoch': 2} {'type': 'loss', 'content': 0.12293580174446106, 'timestamp': '2025-10-01 04:34:16.296905', 'step': 12586, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:16.350801', 'step': 12586, 'epoch': 2} {'type': 'loss', 'content': 0.08199035376310349, 'timestamp': '2025-10-01 04:34:16.352956', 'step': 12587, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:16.406236', 'step': 12587, 'epoch': 2} {'type': 'loss', 'content': 0.190977081656456, 'timestamp': '2025-10-01 04:34:16.412202', 'step': 12588, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:16.467213', 'step': 12588, 'epoch': 2} {'type': 'loss', 'content': 0.0827590674161911, 'timestamp': '2025-10-01 04:34:16.469324', 'step': 12589, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:34:16.522846', 'step': 12589, 'epoch': 2} {'type': 'loss', 'content': 0.1237914115190506, 'timestamp': '2025-10-01 04:34:16.524792', 'step': 12590, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:16.577582', 'step': 12590, 'epoch': 2} {'type': 'loss', 'content': 0.05385938659310341, 'timestamp': '2025-10-01 04:34:16.580115', 'step': 12591, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:16.637682', 'step': 12591, 'epoch': 2} {'type': 'loss', 'content': 0.06699429452419281, 'timestamp': '2025-10-01 04:34:16.643544', 'step': 12592, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:16.696091', 'step': 12592, 'epoch': 2} {'type': 'loss', 'content': 0.1287626177072525, 'timestamp': '2025-10-01 04:34:16.698049', 'step': 12593, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:16.753687', 'step': 12593, 'epoch': 2} {'type': 'loss', 'content': 0.12087000161409378, 'timestamp': '2025-10-01 04:34:16.755874', 'step': 12594, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:16.811857', 'step': 12594, 'epoch': 2} {'type': 'loss', 'content': 0.12824100255966187, 'timestamp': '2025-10-01 04:34:16.813871', 'step': 12595, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:16.867368', 'step': 12595, 'epoch': 2} {'type': 'loss', 'content': 0.14433827996253967, 'timestamp': '2025-10-01 04:34:16.872876', 'step': 12596, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:16.925701', 'step': 12596, 'epoch': 2} {'type': 'loss', 'content': 0.09990494698286057, 'timestamp': '2025-10-01 04:34:16.927709', 'step': 12597, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:16.981739', 'step': 12597, 'epoch': 2} {'type': 'loss', 'content': 0.18988709151744843, 'timestamp': '2025-10-01 04:34:16.984246', 'step': 12598, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:17.037969', 'step': 12598, 'epoch': 2} {'type': 'loss', 'content': 0.18212555348873138, 'timestamp': '2025-10-01 04:34:17.040090', 'step': 12599, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:34:17.102284', 'step': 12599, 'epoch': 2} {'type': 'loss', 'content': 0.1087721586227417, 'timestamp': '2025-10-01 04:34:17.108311', 'step': 12600, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:17.160870', 'step': 12600, 'epoch': 2} {'type': 'loss', 'content': 0.1393163502216339, 'timestamp': '2025-10-01 04:34:17.162981', 'step': 12601, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:17.221660', 'step': 12601, 'epoch': 2} {'type': 'loss', 'content': 0.10540995001792908, 'timestamp': '2025-10-01 04:34:17.223728', 'step': 12602, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:17.277236', 'step': 12602, 'epoch': 2} {'type': 'loss', 'content': 0.06860794872045517, 'timestamp': '2025-10-01 04:34:17.279849', 'step': 12603, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:17.333121', 'step': 12603, 'epoch': 2} {'type': 'loss', 'content': 0.09833312034606934, 'timestamp': '2025-10-01 04:34:17.339487', 'step': 12604, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:17.392233', 'step': 12604, 'epoch': 2} {'type': 'loss', 'content': 0.09221486747264862, 'timestamp': '2025-10-01 04:34:17.394597', 'step': 12605, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:17.448887', 'step': 12605, 'epoch': 2} {'type': 'loss', 'content': 0.11079095304012299, 'timestamp': '2025-10-01 04:34:17.451547', 'step': 12606, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:17.505618', 'step': 12606, 'epoch': 2} {'type': 'loss', 'content': 0.08606714755296707, 'timestamp': '2025-10-01 04:34:17.507902', 'step': 12607, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:17.561823', 'step': 12607, 'epoch': 2} {'type': 'loss', 'content': 0.1656748354434967, 'timestamp': '2025-10-01 04:34:17.567966', 'step': 12608, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:17.620702', 'step': 12608, 'epoch': 2} {'type': 'loss', 'content': 0.015057696960866451, 'timestamp': '2025-10-01 04:34:17.629792', 'step': 12609, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:17.692212', 'step': 12609, 'epoch': 2} {'type': 'loss', 'content': 0.1523509919643402, 'timestamp': '2025-10-01 04:34:17.694716', 'step': 12610, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:34:17.749927', 'step': 12610, 'epoch': 2} {'type': 'loss', 'content': 0.06503796577453613, 'timestamp': '2025-10-01 04:34:17.753490', 'step': 12611, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:34:17.807615', 'step': 12611, 'epoch': 2} {'type': 'loss', 'content': 0.10846579819917679, 'timestamp': '2025-10-01 04:34:17.813589', 'step': 12612, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:34:17.867343', 'step': 12612, 'epoch': 2} {'type': 'loss', 'content': 0.13391493260860443, 'timestamp': '2025-10-01 04:34:17.869557', 'step': 12613, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:17.923657', 'step': 12613, 'epoch': 2} {'type': 'loss', 'content': 0.16389013826847076, 'timestamp': '2025-10-01 04:34:17.925561', 'step': 12614, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:34:17.979081', 'step': 12614, 'epoch': 2} {'type': 'loss', 'content': 0.12313839793205261, 'timestamp': '2025-10-01 04:34:17.982706', 'step': 12615, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:18.036620', 'step': 12615, 'epoch': 2} {'type': 'loss', 'content': 0.08614502847194672, 'timestamp': '2025-10-01 04:34:18.042089', 'step': 12616, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:18.095403', 'step': 12616, 'epoch': 2} {'type': 'loss', 'content': 0.10196822881698608, 'timestamp': '2025-10-01 04:34:18.106563', 'step': 12617, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:18.159849', 'step': 12617, 'epoch': 2} {'type': 'loss', 'content': 0.11491759121417999, 'timestamp': '2025-10-01 04:34:18.178735', 'step': 12618, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:18.253116', 'step': 12618, 'epoch': 2} {'type': 'loss', 'content': 0.10537759959697723, 'timestamp': '2025-10-01 04:34:18.255243', 'step': 12619, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:18.308461', 'step': 12619, 'epoch': 2} {'type': 'loss', 'content': 0.1988372802734375, 'timestamp': '2025-10-01 04:34:18.314603', 'step': 12620, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:18.367691', 'step': 12620, 'epoch': 2} {'type': 'loss', 'content': 0.06232712045311928, 'timestamp': '2025-10-01 04:34:18.370027', 'step': 12621, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:18.422994', 'step': 12621, 'epoch': 2} {'type': 'loss', 'content': 0.11972160637378693, 'timestamp': '2025-10-01 04:34:18.425445', 'step': 12622, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:34:18.488323', 'step': 12622, 'epoch': 2} {'type': 'loss', 'content': 0.17808161675930023, 'timestamp': '2025-10-01 04:34:18.490532', 'step': 12623, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:18.544884', 'step': 12623, 'epoch': 2} {'type': 'loss', 'content': 0.17356707155704498, 'timestamp': '2025-10-01 04:34:18.550875', 'step': 12624, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:34:18.609336', 'step': 12624, 'epoch': 2} {'type': 'loss', 'content': 0.1625238060951233, 'timestamp': '2025-10-01 04:34:18.612924', 'step': 12625, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:18.670246', 'step': 12625, 'epoch': 2} {'type': 'loss', 'content': 0.09329843521118164, 'timestamp': '2025-10-01 04:34:18.672833', 'step': 12626, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:18.726807', 'step': 12626, 'epoch': 2} {'type': 'loss', 'content': 0.17347373068332672, 'timestamp': '2025-10-01 04:34:18.729596', 'step': 12627, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:18.788996', 'step': 12627, 'epoch': 2} {'type': 'loss', 'content': 0.09259224683046341, 'timestamp': '2025-10-01 04:34:18.796388', 'step': 12628, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:34:18.850432', 'step': 12628, 'epoch': 2} {'type': 'loss', 'content': 0.19032637774944305, 'timestamp': '2025-10-01 04:34:18.854056', 'step': 12629, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:18.907918', 'step': 12629, 'epoch': 2} {'type': 'loss', 'content': 0.12002945691347122, 'timestamp': '2025-10-01 04:34:18.910025', 'step': 12630, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:18.963173', 'step': 12630, 'epoch': 2} {'type': 'loss', 'content': 0.1811893731355667, 'timestamp': '2025-10-01 04:34:18.967306', 'step': 12631, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:19.021150', 'step': 12631, 'epoch': 2} {'type': 'loss', 'content': 0.17509207129478455, 'timestamp': '2025-10-01 04:34:19.026726', 'step': 12632, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:19.079657', 'step': 12632, 'epoch': 2} {'type': 'loss', 'content': 0.04739430919289589, 'timestamp': '2025-10-01 04:34:19.081944', 'step': 12633, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:19.135290', 'step': 12633, 'epoch': 2} {'type': 'loss', 'content': 0.11586897075176239, 'timestamp': '2025-10-01 04:34:19.139771', 'step': 12634, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:34:19.194944', 'step': 12634, 'epoch': 2} {'type': 'loss', 'content': 0.1581205427646637, 'timestamp': '2025-10-01 04:34:19.197098', 'step': 12635, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:19.255738', 'step': 12635, 'epoch': 2} {'type': 'loss', 'content': 0.056590549647808075, 'timestamp': '2025-10-01 04:34:19.266150', 'step': 12636, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-01 04:34:19.321186', 'step': 12636, 'epoch': 2} {'type': 'loss', 'content': 0.09416532516479492, 'timestamp': '2025-10-01 04:34:19.323258', 'step': 12637, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:34:19.376494', 'step': 12637, 'epoch': 2} {'type': 'loss', 'content': 0.06383193284273148, 'timestamp': '2025-10-01 04:34:19.378359', 'step': 12638, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:19.431622', 'step': 12638, 'epoch': 2} {'type': 'loss', 'content': 0.10890434682369232, 'timestamp': '2025-10-01 04:34:19.433944', 'step': 12639, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:19.487008', 'step': 12639, 'epoch': 2} {'type': 'loss', 'content': 0.1341392695903778, 'timestamp': '2025-10-01 04:34:19.492783', 'step': 12640, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:19.545877', 'step': 12640, 'epoch': 2} {'type': 'loss', 'content': 0.11661624908447266, 'timestamp': '2025-10-01 04:34:19.549084', 'step': 12641, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:19.603808', 'step': 12641, 'epoch': 2} {'type': 'loss', 'content': 0.1430847942829132, 'timestamp': '2025-10-01 04:34:19.606325', 'step': 12642, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:19.659857', 'step': 12642, 'epoch': 2} {'type': 'loss', 'content': 0.10365881770849228, 'timestamp': '2025-10-01 04:34:19.661954', 'step': 12643, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:19.716174', 'step': 12643, 'epoch': 2} {'type': 'loss', 'content': 0.14190450310707092, 'timestamp': '2025-10-01 04:34:19.721735', 'step': 12644, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:19.777721', 'step': 12644, 'epoch': 2} {'type': 'loss', 'content': 0.170134499669075, 'timestamp': '2025-10-01 04:34:19.779694', 'step': 12645, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:19.832835', 'step': 12645, 'epoch': 2} {'type': 'loss', 'content': 0.14545632898807526, 'timestamp': '2025-10-01 04:34:19.835057', 'step': 12646, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:19.896663', 'step': 12646, 'epoch': 2} {'type': 'loss', 'content': 0.10247956961393356, 'timestamp': '2025-10-01 04:34:19.898981', 'step': 12647, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:19.952031', 'step': 12647, 'epoch': 2} {'type': 'loss', 'content': 0.18927115201950073, 'timestamp': '2025-10-01 04:34:19.957488', 'step': 12648, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:20.014420', 'step': 12648, 'epoch': 2} {'type': 'loss', 'content': 0.17297200858592987, 'timestamp': '2025-10-01 04:34:20.017078', 'step': 12649, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:20.075176', 'step': 12649, 'epoch': 2} {'type': 'loss', 'content': 0.12681712210178375, 'timestamp': '2025-10-01 04:34:20.085597', 'step': 12650, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:20.148310', 'step': 12650, 'epoch': 2} {'type': 'loss', 'content': 0.11433632671833038, 'timestamp': '2025-10-01 04:34:20.150334', 'step': 12651, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:20.205238', 'step': 12651, 'epoch': 2} {'type': 'loss', 'content': 0.12134336680173874, 'timestamp': '2025-10-01 04:34:20.211039', 'step': 12652, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:34:20.263773', 'step': 12652, 'epoch': 2} {'type': 'loss', 'content': 0.10756407678127289, 'timestamp': '2025-10-01 04:34:20.265869', 'step': 12653, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:20.320325', 'step': 12653, 'epoch': 2} {'type': 'loss', 'content': 0.12786653637886047, 'timestamp': '2025-10-01 04:34:20.322530', 'step': 12654, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:34:20.376258', 'step': 12654, 'epoch': 2} {'type': 'loss', 'content': 0.08702636510133743, 'timestamp': '2025-10-01 04:34:20.378486', 'step': 12655, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:20.432266', 'step': 12655, 'epoch': 2} {'type': 'loss', 'content': 0.09913525730371475, 'timestamp': '2025-10-01 04:34:20.438153', 'step': 12656, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:34:20.491841', 'step': 12656, 'epoch': 2} {'type': 'loss', 'content': 0.22690565884113312, 'timestamp': '2025-10-01 04:34:20.493951', 'step': 12657, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:20.547505', 'step': 12657, 'epoch': 2} {'type': 'loss', 'content': 0.14848318696022034, 'timestamp': '2025-10-01 04:34:20.549520', 'step': 12658, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:20.602563', 'step': 12658, 'epoch': 2} {'type': 'loss', 'content': 0.10550922155380249, 'timestamp': '2025-10-01 04:34:20.604937', 'step': 12659, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:20.658477', 'step': 12659, 'epoch': 2} {'type': 'loss', 'content': 0.18350918591022491, 'timestamp': '2025-10-01 04:34:20.664013', 'step': 12660, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:20.716458', 'step': 12660, 'epoch': 2} {'type': 'loss', 'content': 0.20393221080303192, 'timestamp': '2025-10-01 04:34:20.718505', 'step': 12661, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:20.772214', 'step': 12661, 'epoch': 2} {'type': 'loss', 'content': 0.14557935297489166, 'timestamp': '2025-10-01 04:34:20.779535', 'step': 12662, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:34:20.834131', 'step': 12662, 'epoch': 2} {'type': 'loss', 'content': 0.11655928939580917, 'timestamp': '2025-10-01 04:34:20.837082', 'step': 12663, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:20.889945', 'step': 12663, 'epoch': 2} {'type': 'loss', 'content': 0.08597321063280106, 'timestamp': '2025-10-01 04:34:20.895846', 'step': 12664, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:20.948861', 'step': 12664, 'epoch': 2} {'type': 'loss', 'content': 0.08940018713474274, 'timestamp': '2025-10-01 04:34:20.952442', 'step': 12665, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:34:21.017775', 'step': 12665, 'epoch': 2} {'type': 'loss', 'content': 0.08880380541086197, 'timestamp': '2025-10-01 04:34:21.019878', 'step': 12666, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:21.072743', 'step': 12666, 'epoch': 2} {'type': 'loss', 'content': 0.12101522088050842, 'timestamp': '2025-10-01 04:34:21.074740', 'step': 12667, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:34:21.127727', 'step': 12667, 'epoch': 2} {'type': 'loss', 'content': 0.12645484507083893, 'timestamp': '2025-10-01 04:34:21.133487', 'step': 12668, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:21.186417', 'step': 12668, 'epoch': 2} {'type': 'loss', 'content': 0.15652266144752502, 'timestamp': '2025-10-01 04:34:21.188970', 'step': 12669, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:21.241984', 'step': 12669, 'epoch': 2} {'type': 'loss', 'content': 0.13619360327720642, 'timestamp': '2025-10-01 04:34:21.244262', 'step': 12670, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:21.297634', 'step': 12670, 'epoch': 2} {'type': 'loss', 'content': 0.054697923362255096, 'timestamp': '2025-10-01 04:34:21.299813', 'step': 12671, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:21.354298', 'step': 12671, 'epoch': 2} {'type': 'loss', 'content': 0.12080859392881393, 'timestamp': '2025-10-01 04:34:21.359969', 'step': 12672, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:21.412603', 'step': 12672, 'epoch': 2} {'type': 'loss', 'content': 0.0751708522439003, 'timestamp': '2025-10-01 04:34:21.414466', 'step': 12673, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:21.467342', 'step': 12673, 'epoch': 2} {'type': 'loss', 'content': 0.20812787115573883, 'timestamp': '2025-10-01 04:34:21.473454', 'step': 12674, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:21.526476', 'step': 12674, 'epoch': 2} {'type': 'loss', 'content': 0.0912434309720993, 'timestamp': '2025-10-01 04:34:21.528515', 'step': 12675, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:21.581820', 'step': 12675, 'epoch': 2} {'type': 'loss', 'content': 0.11616787314414978, 'timestamp': '2025-10-01 04:34:21.587554', 'step': 12676, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:21.639930', 'step': 12676, 'epoch': 2} {'type': 'loss', 'content': 0.07741229981184006, 'timestamp': '2025-10-01 04:34:21.642894', 'step': 12677, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:21.696090', 'step': 12677, 'epoch': 2} {'type': 'loss', 'content': 0.1346573680639267, 'timestamp': '2025-10-01 04:34:21.698290', 'step': 12678, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:21.756771', 'step': 12678, 'epoch': 2} {'type': 'loss', 'content': 0.12252255529165268, 'timestamp': '2025-10-01 04:34:21.759101', 'step': 12679, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:21.812614', 'step': 12679, 'epoch': 2} {'type': 'loss', 'content': 0.1368994116783142, 'timestamp': '2025-10-01 04:34:21.818484', 'step': 12680, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:34:21.872014', 'step': 12680, 'epoch': 2} {'type': 'loss', 'content': 0.21996267139911652, 'timestamp': '2025-10-01 04:34:21.874537', 'step': 12681, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:21.928105', 'step': 12681, 'epoch': 2} {'type': 'loss', 'content': 0.23014424741268158, 'timestamp': '2025-10-01 04:34:21.930230', 'step': 12682, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:21.984110', 'step': 12682, 'epoch': 2} {'type': 'loss', 'content': 0.06386858969926834, 'timestamp': '2025-10-01 04:34:21.986940', 'step': 12683, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:22.040413', 'step': 12683, 'epoch': 2} {'type': 'loss', 'content': 0.13736969232559204, 'timestamp': '2025-10-01 04:34:22.046398', 'step': 12684, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:22.099196', 'step': 12684, 'epoch': 2} {'type': 'loss', 'content': 0.07049021869897842, 'timestamp': '2025-10-01 04:34:22.101193', 'step': 12685, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:22.154105', 'step': 12685, 'epoch': 2} {'type': 'loss', 'content': 0.09449402987957001, 'timestamp': '2025-10-01 04:34:22.156435', 'step': 12686, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:22.209577', 'step': 12686, 'epoch': 2} {'type': 'loss', 'content': 0.16419993340969086, 'timestamp': '2025-10-01 04:34:22.211783', 'step': 12687, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:22.269007', 'step': 12687, 'epoch': 2} {'type': 'loss', 'content': 0.0675104632973671, 'timestamp': '2025-10-01 04:34:22.274857', 'step': 12688, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:22.344322', 'step': 12688, 'epoch': 2} {'type': 'loss', 'content': 0.15139609575271606, 'timestamp': '2025-10-01 04:34:22.346406', 'step': 12689, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:22.403080', 'step': 12689, 'epoch': 2} {'type': 'loss', 'content': 0.13074596226215363, 'timestamp': '2025-10-01 04:34:22.406003', 'step': 12690, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:22.459481', 'step': 12690, 'epoch': 2} {'type': 'loss', 'content': 0.051284242421388626, 'timestamp': '2025-10-01 04:34:22.461725', 'step': 12691, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:34:22.515261', 'step': 12691, 'epoch': 2} {'type': 'loss', 'content': 0.12144896388053894, 'timestamp': '2025-10-01 04:34:22.520933', 'step': 12692, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:22.575022', 'step': 12692, 'epoch': 2} {'type': 'loss', 'content': 0.08139274269342422, 'timestamp': '2025-10-01 04:34:22.576959', 'step': 12693, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:22.630017', 'step': 12693, 'epoch': 2} {'type': 'loss', 'content': 0.24754267930984497, 'timestamp': '2025-10-01 04:34:22.632129', 'step': 12694, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:22.686367', 'step': 12694, 'epoch': 2} {'type': 'loss', 'content': 0.03974643722176552, 'timestamp': '2025-10-01 04:34:22.688516', 'step': 12695, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:22.741565', 'step': 12695, 'epoch': 2} {'type': 'loss', 'content': 0.09823243319988251, 'timestamp': '2025-10-01 04:34:22.747226', 'step': 12696, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:22.800505', 'step': 12696, 'epoch': 2} {'type': 'loss', 'content': 0.07720448076725006, 'timestamp': '2025-10-01 04:34:22.802833', 'step': 12697, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:34:22.861640', 'step': 12697, 'epoch': 2} {'type': 'loss', 'content': 0.1938149482011795, 'timestamp': '2025-10-01 04:34:22.864006', 'step': 12698, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:22.918120', 'step': 12698, 'epoch': 2} {'type': 'loss', 'content': 0.1048419401049614, 'timestamp': '2025-10-01 04:34:22.920145', 'step': 12699, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:22.974069', 'step': 12699, 'epoch': 2} {'type': 'loss', 'content': 0.18719102442264557, 'timestamp': '2025-10-01 04:34:22.979734', 'step': 12700, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:23.033019', 'step': 12700, 'epoch': 2} {'type': 'loss', 'content': 0.12114616483449936, 'timestamp': '2025-10-01 04:34:23.035011', 'step': 12701, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:23.088339', 'step': 12701, 'epoch': 2} {'type': 'loss', 'content': 0.11076442152261734, 'timestamp': '2025-10-01 04:34:23.090397', 'step': 12702, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:23.143891', 'step': 12702, 'epoch': 2} {'type': 'loss', 'content': 0.07622335851192474, 'timestamp': '2025-10-01 04:34:23.146110', 'step': 12703, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:34:23.199390', 'step': 12703, 'epoch': 2} {'type': 'loss', 'content': 0.1261807382106781, 'timestamp': '2025-10-01 04:34:23.205184', 'step': 12704, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:23.258897', 'step': 12704, 'epoch': 2} {'type': 'loss', 'content': 0.04816213995218277, 'timestamp': '2025-10-01 04:34:23.261103', 'step': 12705, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:23.314443', 'step': 12705, 'epoch': 2} {'type': 'loss', 'content': 0.10396745800971985, 'timestamp': '2025-10-01 04:34:23.316687', 'step': 12706, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:23.371689', 'step': 12706, 'epoch': 2} {'type': 'loss', 'content': 0.1495196670293808, 'timestamp': '2025-10-01 04:34:23.373927', 'step': 12707, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:34:23.427373', 'step': 12707, 'epoch': 2} {'type': 'loss', 'content': 0.08790507167577744, 'timestamp': '2025-10-01 04:34:23.433078', 'step': 12708, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:23.487542', 'step': 12708, 'epoch': 2} {'type': 'loss', 'content': 0.08955880999565125, 'timestamp': '2025-10-01 04:34:23.489603', 'step': 12709, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:23.543102', 'step': 12709, 'epoch': 2} {'type': 'loss', 'content': 0.15558677911758423, 'timestamp': '2025-10-01 04:34:23.545012', 'step': 12710, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:23.598307', 'step': 12710, 'epoch': 2} {'type': 'loss', 'content': 0.0928587019443512, 'timestamp': '2025-10-01 04:34:23.602517', 'step': 12711, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:23.659238', 'step': 12711, 'epoch': 2} {'type': 'loss', 'content': 0.14655479788780212, 'timestamp': '2025-10-01 04:34:23.664917', 'step': 12712, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:23.719891', 'step': 12712, 'epoch': 2} {'type': 'loss', 'content': 0.1586053967475891, 'timestamp': '2025-10-01 04:34:23.722177', 'step': 12713, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:23.780444', 'step': 12713, 'epoch': 2} {'type': 'loss', 'content': 0.13945870101451874, 'timestamp': '2025-10-01 04:34:23.782483', 'step': 12714, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:23.837531', 'step': 12714, 'epoch': 2} {'type': 'loss', 'content': 0.14694108068943024, 'timestamp': '2025-10-01 04:34:23.839773', 'step': 12715, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:23.894431', 'step': 12715, 'epoch': 2} {'type': 'loss', 'content': 0.11767816543579102, 'timestamp': '2025-10-01 04:34:23.900207', 'step': 12716, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:34:23.954875', 'step': 12716, 'epoch': 2} {'type': 'loss', 'content': 0.1427651345729828, 'timestamp': '2025-10-01 04:34:23.956919', 'step': 12717, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:24.010302', 'step': 12717, 'epoch': 2} {'type': 'loss', 'content': 0.10623262822628021, 'timestamp': '2025-10-01 04:34:24.012400', 'step': 12718, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:24.065992', 'step': 12718, 'epoch': 2} {'type': 'loss', 'content': 0.053454138338565826, 'timestamp': '2025-10-01 04:34:24.068663', 'step': 12719, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:24.122832', 'step': 12719, 'epoch': 2} {'type': 'loss', 'content': 0.06570034474134445, 'timestamp': '2025-10-01 04:34:24.128695', 'step': 12720, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:34:24.181732', 'step': 12720, 'epoch': 2} {'type': 'loss', 'content': 0.06860000640153885, 'timestamp': '2025-10-01 04:34:24.184196', 'step': 12721, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:24.237673', 'step': 12721, 'epoch': 2} {'type': 'loss', 'content': 0.07556981593370438, 'timestamp': '2025-10-01 04:34:24.239954', 'step': 12722, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:24.295119', 'step': 12722, 'epoch': 2} {'type': 'loss', 'content': 0.06802020221948624, 'timestamp': '2025-10-01 04:34:24.297142', 'step': 12723, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:24.351181', 'step': 12723, 'epoch': 2} {'type': 'loss', 'content': 0.07639017701148987, 'timestamp': '2025-10-01 04:34:24.356879', 'step': 12724, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:24.410405', 'step': 12724, 'epoch': 2} {'type': 'loss', 'content': 0.2062181979417801, 'timestamp': '2025-10-01 04:34:24.412501', 'step': 12725, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:24.465534', 'step': 12725, 'epoch': 2} {'type': 'loss', 'content': 0.12664620578289032, 'timestamp': '2025-10-01 04:34:24.467841', 'step': 12726, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:34:24.521461', 'step': 12726, 'epoch': 2} {'type': 'loss', 'content': 0.12154141068458557, 'timestamp': '2025-10-01 04:34:24.523754', 'step': 12727, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:24.577010', 'step': 12727, 'epoch': 2} {'type': 'loss', 'content': 0.04766300320625305, 'timestamp': '2025-10-01 04:34:24.582437', 'step': 12728, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:34:24.640882', 'step': 12728, 'epoch': 2} {'type': 'loss', 'content': 0.14610370993614197, 'timestamp': '2025-10-01 04:34:24.643022', 'step': 12729, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:24.696173', 'step': 12729, 'epoch': 2} {'type': 'loss', 'content': 0.11487662047147751, 'timestamp': '2025-10-01 04:34:24.698306', 'step': 12730, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:24.751661', 'step': 12730, 'epoch': 2} {'type': 'loss', 'content': 0.1253177672624588, 'timestamp': '2025-10-01 04:34:24.753823', 'step': 12731, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:24.809708', 'step': 12731, 'epoch': 2} {'type': 'loss', 'content': 0.07005754113197327, 'timestamp': '2025-10-01 04:34:24.815494', 'step': 12732, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:34:24.870577', 'step': 12732, 'epoch': 2} {'type': 'loss', 'content': 0.1672906130552292, 'timestamp': '2025-10-01 04:34:24.873048', 'step': 12733, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:24.927308', 'step': 12733, 'epoch': 2} {'type': 'loss', 'content': 0.23020558059215546, 'timestamp': '2025-10-01 04:34:24.929936', 'step': 12734, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:34:24.984174', 'step': 12734, 'epoch': 2} {'type': 'loss', 'content': 0.20430037379264832, 'timestamp': '2025-10-01 04:34:24.994608', 'step': 12735, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:25.048578', 'step': 12735, 'epoch': 2} {'type': 'loss', 'content': 0.11627418547868729, 'timestamp': '2025-10-01 04:34:25.054408', 'step': 12736, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:25.109173', 'step': 12736, 'epoch': 2} {'type': 'loss', 'content': 0.06976243108510971, 'timestamp': '2025-10-01 04:34:25.111772', 'step': 12737, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:25.166911', 'step': 12737, 'epoch': 2} {'type': 'loss', 'content': 0.12437598407268524, 'timestamp': '2025-10-01 04:34:25.169371', 'step': 12738, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:25.225275', 'step': 12738, 'epoch': 2} {'type': 'loss', 'content': 0.22464048862457275, 'timestamp': '2025-10-01 04:34:25.228010', 'step': 12739, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:25.285605', 'step': 12739, 'epoch': 2} {'type': 'loss', 'content': 0.08574520796537399, 'timestamp': '2025-10-01 04:34:25.293801', 'step': 12740, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:25.364981', 'step': 12740, 'epoch': 2} {'type': 'loss', 'content': 0.1742403656244278, 'timestamp': '2025-10-01 04:34:25.370166', 'step': 12741, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:34:25.427071', 'step': 12741, 'epoch': 2} {'type': 'loss', 'content': 0.1853383630514145, 'timestamp': '2025-10-01 04:34:25.429946', 'step': 12742, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:34:25.486222', 'step': 12742, 'epoch': 2} {'type': 'loss', 'content': 0.12218905240297318, 'timestamp': '2025-10-01 04:34:25.490015', 'step': 12743, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:25.544812', 'step': 12743, 'epoch': 2} {'type': 'loss', 'content': 0.09991707652807236, 'timestamp': '2025-10-01 04:34:25.551273', 'step': 12744, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:25.619851', 'step': 12744, 'epoch': 2} {'type': 'loss', 'content': 0.15104840695858002, 'timestamp': '2025-10-01 04:34:25.622912', 'step': 12745, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:34:25.685220', 'step': 12745, 'epoch': 2} {'type': 'loss', 'content': 0.2337307631969452, 'timestamp': '2025-10-01 04:34:25.687516', 'step': 12746, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:34:25.742996', 'step': 12746, 'epoch': 2} {'type': 'loss', 'content': 0.09158550202846527, 'timestamp': '2025-10-01 04:34:25.745113', 'step': 12747, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:25.800760', 'step': 12747, 'epoch': 2} {'type': 'loss', 'content': 0.0878087729215622, 'timestamp': '2025-10-01 04:34:25.809385', 'step': 12748, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:25.873018', 'step': 12748, 'epoch': 2} {'type': 'loss', 'content': 0.1764160841703415, 'timestamp': '2025-10-01 04:34:25.875715', 'step': 12749, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:25.931582', 'step': 12749, 'epoch': 2} {'type': 'loss', 'content': 0.10080766677856445, 'timestamp': '2025-10-01 04:34:25.934059', 'step': 12750, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:34:25.988135', 'step': 12750, 'epoch': 2} {'type': 'loss', 'content': 0.18845199048519135, 'timestamp': '2025-10-01 04:34:25.990391', 'step': 12751, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:26.045229', 'step': 12751, 'epoch': 2} {'type': 'loss', 'content': 0.13466699421405792, 'timestamp': '2025-10-01 04:34:26.051470', 'step': 12752, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:26.105027', 'step': 12752, 'epoch': 2} {'type': 'loss', 'content': 0.21675533056259155, 'timestamp': '2025-10-01 04:34:26.107134', 'step': 12753, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:26.161206', 'step': 12753, 'epoch': 2} {'type': 'loss', 'content': 0.07338318973779678, 'timestamp': '2025-10-01 04:34:26.163737', 'step': 12754, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:26.220275', 'step': 12754, 'epoch': 2} {'type': 'loss', 'content': 0.07715502381324768, 'timestamp': '2025-10-01 04:34:26.223048', 'step': 12755, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:34:26.280235', 'step': 12755, 'epoch': 2} {'type': 'loss', 'content': 0.09824208915233612, 'timestamp': '2025-10-01 04:34:26.287665', 'step': 12756, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:26.342988', 'step': 12756, 'epoch': 2} {'type': 'loss', 'content': 0.14962252974510193, 'timestamp': '2025-10-01 04:34:26.345829', 'step': 12757, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:26.401148', 'step': 12757, 'epoch': 2} {'type': 'loss', 'content': 0.1575620025396347, 'timestamp': '2025-10-01 04:34:26.403102', 'step': 12758, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:26.457192', 'step': 12758, 'epoch': 2} {'type': 'loss', 'content': 0.14307436347007751, 'timestamp': '2025-10-01 04:34:26.459660', 'step': 12759, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:26.514120', 'step': 12759, 'epoch': 2} {'type': 'loss', 'content': 0.10995516926050186, 'timestamp': '2025-10-01 04:34:26.520690', 'step': 12760, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:26.576057', 'step': 12760, 'epoch': 2} {'type': 'loss', 'content': 0.11040037870407104, 'timestamp': '2025-10-01 04:34:26.579306', 'step': 12761, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:34:26.635192', 'step': 12761, 'epoch': 2} {'type': 'loss', 'content': 0.1174698993563652, 'timestamp': '2025-10-01 04:34:26.637762', 'step': 12762, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:26.692645', 'step': 12762, 'epoch': 2} {'type': 'loss', 'content': 0.19454747438430786, 'timestamp': '2025-10-01 04:34:26.694695', 'step': 12763, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:26.750881', 'step': 12763, 'epoch': 2} {'type': 'loss', 'content': 0.06050443649291992, 'timestamp': '2025-10-01 04:34:26.757676', 'step': 12764, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:26.811643', 'step': 12764, 'epoch': 2} {'type': 'loss', 'content': 0.0677560567855835, 'timestamp': '2025-10-01 04:34:26.813977', 'step': 12765, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:26.868184', 'step': 12765, 'epoch': 2} {'type': 'loss', 'content': 0.14112251996994019, 'timestamp': '2025-10-01 04:34:26.870354', 'step': 12766, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:26.927962', 'step': 12766, 'epoch': 2} {'type': 'loss', 'content': 0.10859748721122742, 'timestamp': '2025-10-01 04:34:26.931328', 'step': 12767, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:26.986271', 'step': 12767, 'epoch': 2} {'type': 'loss', 'content': 0.11823046207427979, 'timestamp': '2025-10-01 04:34:26.993135', 'step': 12768, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:27.051886', 'step': 12768, 'epoch': 2} {'type': 'loss', 'content': 0.13118630647659302, 'timestamp': '2025-10-01 04:34:27.055118', 'step': 12769, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:34:27.118094', 'step': 12769, 'epoch': 2} {'type': 'loss', 'content': 0.1273716390132904, 'timestamp': '2025-10-01 04:34:27.121150', 'step': 12770, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:27.181741', 'step': 12770, 'epoch': 2} {'type': 'loss', 'content': 0.06171005591750145, 'timestamp': '2025-10-01 04:34:27.184101', 'step': 12771, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:27.243109', 'step': 12771, 'epoch': 2} {'type': 'loss', 'content': 0.09161986410617828, 'timestamp': '2025-10-01 04:34:27.250592', 'step': 12772, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:27.309729', 'step': 12772, 'epoch': 2} {'type': 'loss', 'content': 0.15907877683639526, 'timestamp': '2025-10-01 04:34:27.312668', 'step': 12773, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:27.372228', 'step': 12773, 'epoch': 2} {'type': 'loss', 'content': 0.2395045906305313, 'timestamp': '2025-10-01 04:34:27.376189', 'step': 12774, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:27.434812', 'step': 12774, 'epoch': 2} {'type': 'loss', 'content': 0.11778254806995392, 'timestamp': '2025-10-01 04:34:27.438711', 'step': 12775, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:27.498914', 'step': 12775, 'epoch': 2} {'type': 'loss', 'content': 0.19193626940250397, 'timestamp': '2025-10-01 04:34:27.505492', 'step': 12776, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:34:27.562961', 'step': 12776, 'epoch': 2} {'type': 'loss', 'content': 0.0839187353849411, 'timestamp': '2025-10-01 04:34:27.565158', 'step': 12777, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:27.624337', 'step': 12777, 'epoch': 2} {'type': 'loss', 'content': 0.08537505567073822, 'timestamp': '2025-10-01 04:34:27.626653', 'step': 12778, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:27.683765', 'step': 12778, 'epoch': 2} {'type': 'loss', 'content': 0.11436878144741058, 'timestamp': '2025-10-01 04:34:27.686873', 'step': 12779, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:27.743495', 'step': 12779, 'epoch': 2} {'type': 'loss', 'content': 0.13623014092445374, 'timestamp': '2025-10-01 04:34:27.751577', 'step': 12780, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:27.806003', 'step': 12780, 'epoch': 2} {'type': 'loss', 'content': 0.09578149765729904, 'timestamp': '2025-10-01 04:34:27.808202', 'step': 12781, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:34:27.862030', 'step': 12781, 'epoch': 2} {'type': 'loss', 'content': 0.08500957489013672, 'timestamp': '2025-10-01 04:34:27.865266', 'step': 12782, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:27.920199', 'step': 12782, 'epoch': 2} {'type': 'loss', 'content': 0.11770996451377869, 'timestamp': '2025-10-01 04:34:27.922828', 'step': 12783, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:27.979164', 'step': 12783, 'epoch': 2} {'type': 'loss', 'content': 0.22177617251873016, 'timestamp': '2025-10-01 04:34:27.985427', 'step': 12784, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:28.041486', 'step': 12784, 'epoch': 2} {'type': 'loss', 'content': 0.12522639334201813, 'timestamp': '2025-10-01 04:34:28.043786', 'step': 12785, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:28.099403', 'step': 12785, 'epoch': 2} {'type': 'loss', 'content': 0.0760449469089508, 'timestamp': '2025-10-01 04:34:28.101379', 'step': 12786, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:28.154532', 'step': 12786, 'epoch': 2} {'type': 'loss', 'content': 0.08367256075143814, 'timestamp': '2025-10-01 04:34:28.156819', 'step': 12787, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:28.210034', 'step': 12787, 'epoch': 2} {'type': 'loss', 'content': 0.08540014922618866, 'timestamp': '2025-10-01 04:34:28.216415', 'step': 12788, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:28.269651', 'step': 12788, 'epoch': 2} {'type': 'loss', 'content': 0.1659965068101883, 'timestamp': '2025-10-01 04:34:28.276960', 'step': 12789, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-01 04:34:28.337074', 'step': 12789, 'epoch': 2} {'type': 'loss', 'content': 0.08337733149528503, 'timestamp': '2025-10-01 04:34:28.339188', 'step': 12790, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:28.400709', 'step': 12790, 'epoch': 2} {'type': 'loss', 'content': 0.14049774408340454, 'timestamp': '2025-10-01 04:34:28.402978', 'step': 12791, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:28.456044', 'step': 12791, 'epoch': 2} {'type': 'loss', 'content': 0.16256913542747498, 'timestamp': '2025-10-01 04:34:28.461993', 'step': 12792, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:28.514751', 'step': 12792, 'epoch': 2} {'type': 'loss', 'content': 0.15173174440860748, 'timestamp': '2025-10-01 04:34:28.516983', 'step': 12793, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:28.570574', 'step': 12793, 'epoch': 2} {'type': 'loss', 'content': 0.08292067050933838, 'timestamp': '2025-10-01 04:34:28.576675', 'step': 12794, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:28.636278', 'step': 12794, 'epoch': 2} {'type': 'loss', 'content': 0.21079730987548828, 'timestamp': '2025-10-01 04:34:28.638535', 'step': 12795, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:28.692357', 'step': 12795, 'epoch': 2} {'type': 'loss', 'content': 0.14698171615600586, 'timestamp': '2025-10-01 04:34:28.698093', 'step': 12796, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:28.752229', 'step': 12796, 'epoch': 2} {'type': 'loss', 'content': 0.11187650263309479, 'timestamp': '2025-10-01 04:34:28.754697', 'step': 12797, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:28.808348', 'step': 12797, 'epoch': 2} {'type': 'loss', 'content': 0.13248802721500397, 'timestamp': '2025-10-01 04:34:28.811184', 'step': 12798, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:34:28.864595', 'step': 12798, 'epoch': 2} {'type': 'loss', 'content': 0.11941114068031311, 'timestamp': '2025-10-01 04:34:28.866928', 'step': 12799, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:28.930586', 'step': 12799, 'epoch': 2} {'type': 'loss', 'content': 0.11395330727100372, 'timestamp': '2025-10-01 04:34:28.936397', 'step': 12800, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:28.989500', 'step': 12800, 'epoch': 2} {'type': 'loss', 'content': 0.07654113322496414, 'timestamp': '2025-10-01 04:34:28.991472', 'step': 12801, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:29.044998', 'step': 12801, 'epoch': 2} {'type': 'loss', 'content': 0.023871099576354027, 'timestamp': '2025-10-01 04:34:29.047215', 'step': 12802, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:29.100808', 'step': 12802, 'epoch': 2} {'type': 'loss', 'content': 0.14373108744621277, 'timestamp': '2025-10-01 04:34:29.102785', 'step': 12803, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:29.155692', 'step': 12803, 'epoch': 2} {'type': 'loss', 'content': 0.12671466171741486, 'timestamp': '2025-10-01 04:34:29.161639', 'step': 12804, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:29.214751', 'step': 12804, 'epoch': 2} {'type': 'loss', 'content': 0.12848912179470062, 'timestamp': '2025-10-01 04:34:29.219433', 'step': 12805, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:29.272840', 'step': 12805, 'epoch': 2} {'type': 'loss', 'content': 0.16624706983566284, 'timestamp': '2025-10-01 04:34:29.275477', 'step': 12806, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:29.328287', 'step': 12806, 'epoch': 2} {'type': 'loss', 'content': 0.16475504636764526, 'timestamp': '2025-10-01 04:34:29.330474', 'step': 12807, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:29.384506', 'step': 12807, 'epoch': 2} {'type': 'loss', 'content': 0.15588192641735077, 'timestamp': '2025-10-01 04:34:29.390328', 'step': 12808, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:29.445100', 'step': 12808, 'epoch': 2} {'type': 'loss', 'content': 0.15428076684474945, 'timestamp': '2025-10-01 04:34:29.447003', 'step': 12809, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:34:29.500254', 'step': 12809, 'epoch': 2} {'type': 'loss', 'content': 0.17066869139671326, 'timestamp': '2025-10-01 04:34:29.502397', 'step': 12810, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:29.557108', 'step': 12810, 'epoch': 2} {'type': 'loss', 'content': 0.1493530124425888, 'timestamp': '2025-10-01 04:34:29.559539', 'step': 12811, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:29.613413', 'step': 12811, 'epoch': 2} {'type': 'loss', 'content': 0.17647695541381836, 'timestamp': '2025-10-01 04:34:29.619523', 'step': 12812, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:29.672201', 'step': 12812, 'epoch': 2} {'type': 'loss', 'content': 0.0849752202630043, 'timestamp': '2025-10-01 04:34:29.674685', 'step': 12813, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:34:29.728736', 'step': 12813, 'epoch': 2} {'type': 'loss', 'content': 0.0763992965221405, 'timestamp': '2025-10-01 04:34:29.730960', 'step': 12814, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:29.784430', 'step': 12814, 'epoch': 2} {'type': 'loss', 'content': 0.1460382491350174, 'timestamp': '2025-10-01 04:34:29.786569', 'step': 12815, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:29.840432', 'step': 12815, 'epoch': 2} {'type': 'loss', 'content': 0.16476541757583618, 'timestamp': '2025-10-01 04:34:29.846071', 'step': 12816, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:29.899155', 'step': 12816, 'epoch': 2} {'type': 'loss', 'content': 0.10838932543992996, 'timestamp': '2025-10-01 04:34:29.901132', 'step': 12817, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:34:29.954070', 'step': 12817, 'epoch': 2} {'type': 'loss', 'content': 0.13925939798355103, 'timestamp': '2025-10-01 04:34:29.956336', 'step': 12818, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:30.010475', 'step': 12818, 'epoch': 2} {'type': 'loss', 'content': 0.08168913424015045, 'timestamp': '2025-10-01 04:34:30.012553', 'step': 12819, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:30.065056', 'step': 12819, 'epoch': 2} {'type': 'loss', 'content': 0.13630612194538116, 'timestamp': '2025-10-01 04:34:30.070919', 'step': 12820, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:30.123380', 'step': 12820, 'epoch': 2} {'type': 'loss', 'content': 0.08198224753141403, 'timestamp': '2025-10-01 04:34:30.125476', 'step': 12821, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:30.178484', 'step': 12821, 'epoch': 2} {'type': 'loss', 'content': 0.09417691826820374, 'timestamp': '2025-10-01 04:34:30.180492', 'step': 12822, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:30.233728', 'step': 12822, 'epoch': 2} {'type': 'loss', 'content': 0.13262812793254852, 'timestamp': '2025-10-01 04:34:30.236381', 'step': 12823, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:30.294795', 'step': 12823, 'epoch': 2} {'type': 'loss', 'content': 0.13085977733135223, 'timestamp': '2025-10-01 04:34:30.300485', 'step': 12824, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:34:30.358301', 'step': 12824, 'epoch': 2} {'type': 'loss', 'content': 0.08345913887023926, 'timestamp': '2025-10-01 04:34:30.360182', 'step': 12825, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:30.414373', 'step': 12825, 'epoch': 2} {'type': 'loss', 'content': 0.10890010744333267, 'timestamp': '2025-10-01 04:34:30.427747', 'step': 12826, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:30.481287', 'step': 12826, 'epoch': 2} {'type': 'loss', 'content': 0.07143589854240417, 'timestamp': '2025-10-01 04:34:30.483445', 'step': 12827, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:30.537058', 'step': 12827, 'epoch': 2} {'type': 'loss', 'content': 0.06249037757515907, 'timestamp': '2025-10-01 04:34:30.543365', 'step': 12828, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:30.596000', 'step': 12828, 'epoch': 2} {'type': 'loss', 'content': 0.15619470179080963, 'timestamp': '2025-10-01 04:34:30.598064', 'step': 12829, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:30.650890', 'step': 12829, 'epoch': 2} {'type': 'loss', 'content': 0.08231155574321747, 'timestamp': '2025-10-01 04:34:30.653094', 'step': 12830, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:34:30.706266', 'step': 12830, 'epoch': 2} {'type': 'loss', 'content': 0.08220767229795456, 'timestamp': '2025-10-01 04:34:30.708429', 'step': 12831, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:30.763695', 'step': 12831, 'epoch': 2} {'type': 'loss', 'content': 0.1812465786933899, 'timestamp': '2025-10-01 04:34:30.769385', 'step': 12832, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:34:30.822752', 'step': 12832, 'epoch': 2} {'type': 'loss', 'content': 0.10997017472982407, 'timestamp': '2025-10-01 04:34:30.824764', 'step': 12833, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:30.877788', 'step': 12833, 'epoch': 2} {'type': 'loss', 'content': 0.09161067754030228, 'timestamp': '2025-10-01 04:34:30.879976', 'step': 12834, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:30.933254', 'step': 12834, 'epoch': 2} {'type': 'loss', 'content': 0.055908944457769394, 'timestamp': '2025-10-01 04:34:30.935464', 'step': 12835, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:30.988619', 'step': 12835, 'epoch': 2} {'type': 'loss', 'content': 0.07938197255134583, 'timestamp': '2025-10-01 04:34:30.994366', 'step': 12836, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:31.047104', 'step': 12836, 'epoch': 2} {'type': 'loss', 'content': 0.1064915731549263, 'timestamp': '2025-10-01 04:34:31.049238', 'step': 12837, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:31.102599', 'step': 12837, 'epoch': 2} {'type': 'loss', 'content': 0.0788244977593422, 'timestamp': '2025-10-01 04:34:31.104789', 'step': 12838, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:31.158426', 'step': 12838, 'epoch': 2} {'type': 'loss', 'content': 0.16107070446014404, 'timestamp': '2025-10-01 04:34:31.160558', 'step': 12839, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:31.213795', 'step': 12839, 'epoch': 2} {'type': 'loss', 'content': 0.12768356502056122, 'timestamp': '2025-10-01 04:34:31.219523', 'step': 12840, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:34:31.272252', 'step': 12840, 'epoch': 2} {'type': 'loss', 'content': 0.09346915036439896, 'timestamp': '2025-10-01 04:34:31.274507', 'step': 12841, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:31.327998', 'step': 12841, 'epoch': 2} {'type': 'loss', 'content': 0.0897708460688591, 'timestamp': '2025-10-01 04:34:31.330240', 'step': 12842, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:31.384821', 'step': 12842, 'epoch': 2} {'type': 'loss', 'content': 0.0632844865322113, 'timestamp': '2025-10-01 04:34:31.387051', 'step': 12843, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:31.440570', 'step': 12843, 'epoch': 2} {'type': 'loss', 'content': 0.11834942549467087, 'timestamp': '2025-10-01 04:34:31.446203', 'step': 12844, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:31.498520', 'step': 12844, 'epoch': 2} {'type': 'loss', 'content': 0.09125503152608871, 'timestamp': '2025-10-01 04:34:31.503944', 'step': 12845, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:31.558190', 'step': 12845, 'epoch': 2} {'type': 'loss', 'content': 0.0650271400809288, 'timestamp': '2025-10-01 04:34:31.560296', 'step': 12846, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:31.615883', 'step': 12846, 'epoch': 2} {'type': 'loss', 'content': 0.12593182921409607, 'timestamp': '2025-10-01 04:34:31.617953', 'step': 12847, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:34:31.674130', 'step': 12847, 'epoch': 2} {'type': 'loss', 'content': 0.13591492176055908, 'timestamp': '2025-10-01 04:34:31.680444', 'step': 12848, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:31.733206', 'step': 12848, 'epoch': 2} {'type': 'loss', 'content': 0.11676215380430222, 'timestamp': '2025-10-01 04:34:31.735708', 'step': 12849, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:31.789163', 'step': 12849, 'epoch': 2} {'type': 'loss', 'content': 0.16660906374454498, 'timestamp': '2025-10-01 04:34:31.791253', 'step': 12850, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:31.844711', 'step': 12850, 'epoch': 2} {'type': 'loss', 'content': 0.09783416986465454, 'timestamp': '2025-10-01 04:34:31.846846', 'step': 12851, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:31.900133', 'step': 12851, 'epoch': 2} {'type': 'loss', 'content': 0.18780092895030975, 'timestamp': '2025-10-01 04:34:31.905615', 'step': 12852, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:31.959869', 'step': 12852, 'epoch': 2} {'type': 'loss', 'content': 0.06552256643772125, 'timestamp': '2025-10-01 04:34:31.962653', 'step': 12853, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:32.017776', 'step': 12853, 'epoch': 2} {'type': 'loss', 'content': 0.07870166003704071, 'timestamp': '2025-10-01 04:34:32.020773', 'step': 12854, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:32.074258', 'step': 12854, 'epoch': 2} {'type': 'loss', 'content': 0.16598455607891083, 'timestamp': '2025-10-01 04:34:32.076779', 'step': 12855, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:32.130647', 'step': 12855, 'epoch': 2} {'type': 'loss', 'content': 0.14592672884464264, 'timestamp': '2025-10-01 04:34:32.136477', 'step': 12856, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:32.191692', 'step': 12856, 'epoch': 2} {'type': 'loss', 'content': 0.09314073622226715, 'timestamp': '2025-10-01 04:34:32.194187', 'step': 12857, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:32.247815', 'step': 12857, 'epoch': 2} {'type': 'loss', 'content': 0.16126838326454163, 'timestamp': '2025-10-01 04:34:32.250083', 'step': 12858, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:32.303231', 'step': 12858, 'epoch': 2} {'type': 'loss', 'content': 0.15814760327339172, 'timestamp': '2025-10-01 04:34:32.306446', 'step': 12859, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:32.360646', 'step': 12859, 'epoch': 2} {'type': 'loss', 'content': 0.09575401991605759, 'timestamp': '2025-10-01 04:34:32.367162', 'step': 12860, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:32.424159', 'step': 12860, 'epoch': 2} {'type': 'loss', 'content': 0.08934883773326874, 'timestamp': '2025-10-01 04:34:32.426720', 'step': 12861, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:32.480518', 'step': 12861, 'epoch': 2} {'type': 'loss', 'content': 0.10887739062309265, 'timestamp': '2025-10-01 04:34:32.482655', 'step': 12862, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:34:32.536197', 'step': 12862, 'epoch': 2} {'type': 'loss', 'content': 0.08815916627645493, 'timestamp': '2025-10-01 04:34:32.538374', 'step': 12863, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:32.592085', 'step': 12863, 'epoch': 2} {'type': 'loss', 'content': 0.08937481790781021, 'timestamp': '2025-10-01 04:34:32.599726', 'step': 12864, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:32.652738', 'step': 12864, 'epoch': 2} {'type': 'loss', 'content': 0.06226717680692673, 'timestamp': '2025-10-01 04:34:32.655814', 'step': 12865, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:32.710426', 'step': 12865, 'epoch': 2} {'type': 'loss', 'content': 0.1121334582567215, 'timestamp': '2025-10-01 04:34:32.713380', 'step': 12866, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:32.767817', 'step': 12866, 'epoch': 2} {'type': 'loss', 'content': 0.09901472926139832, 'timestamp': '2025-10-01 04:34:32.769964', 'step': 12867, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:32.823177', 'step': 12867, 'epoch': 2} {'type': 'loss', 'content': 0.08427125215530396, 'timestamp': '2025-10-01 04:34:32.828653', 'step': 12868, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:32.885108', 'step': 12868, 'epoch': 2} {'type': 'loss', 'content': 0.06095164641737938, 'timestamp': '2025-10-01 04:34:32.887031', 'step': 12869, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:32.946528', 'step': 12869, 'epoch': 2} {'type': 'loss', 'content': 0.10267314314842224, 'timestamp': '2025-10-01 04:34:32.948916', 'step': 12870, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:33.003759', 'step': 12870, 'epoch': 2} {'type': 'loss', 'content': 0.13085220754146576, 'timestamp': '2025-10-01 04:34:33.006477', 'step': 12871, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:34:33.061188', 'step': 12871, 'epoch': 2} {'type': 'loss', 'content': 0.1664300262928009, 'timestamp': '2025-10-01 04:34:33.067350', 'step': 12872, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:33.121190', 'step': 12872, 'epoch': 2} {'type': 'loss', 'content': 0.08453787863254547, 'timestamp': '2025-10-01 04:34:33.123325', 'step': 12873, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:33.177536', 'step': 12873, 'epoch': 2} {'type': 'loss', 'content': 0.08577468246221542, 'timestamp': '2025-10-01 04:34:33.179929', 'step': 12874, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:33.233437', 'step': 12874, 'epoch': 2} {'type': 'loss', 'content': 0.14410272240638733, 'timestamp': '2025-10-01 04:34:33.235632', 'step': 12875, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:33.289567', 'step': 12875, 'epoch': 2} {'type': 'loss', 'content': 0.21899181604385376, 'timestamp': '2025-10-01 04:34:33.295600', 'step': 12876, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:33.349137', 'step': 12876, 'epoch': 2} {'type': 'loss', 'content': 0.11678554862737656, 'timestamp': '2025-10-01 04:34:33.351482', 'step': 12877, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:34:33.406795', 'step': 12877, 'epoch': 2} {'type': 'loss', 'content': 0.17573601007461548, 'timestamp': '2025-10-01 04:34:33.408837', 'step': 12878, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:33.475728', 'step': 12878, 'epoch': 2} {'type': 'loss', 'content': 0.07668443024158478, 'timestamp': '2025-10-01 04:34:33.478324', 'step': 12879, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:33.534035', 'step': 12879, 'epoch': 2} {'type': 'loss', 'content': 0.0805499255657196, 'timestamp': '2025-10-01 04:34:33.539807', 'step': 12880, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:33.594300', 'step': 12880, 'epoch': 2} {'type': 'loss', 'content': 0.08232972025871277, 'timestamp': '2025-10-01 04:34:33.597132', 'step': 12881, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:33.651677', 'step': 12881, 'epoch': 2} {'type': 'loss', 'content': 0.1900993138551712, 'timestamp': '2025-10-01 04:34:33.653844', 'step': 12882, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:33.708199', 'step': 12882, 'epoch': 2} {'type': 'loss', 'content': 0.054263923317193985, 'timestamp': '2025-10-01 04:34:33.710452', 'step': 12883, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:33.764837', 'step': 12883, 'epoch': 2} {'type': 'loss', 'content': 0.14255093038082123, 'timestamp': '2025-10-01 04:34:33.771208', 'step': 12884, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:34:33.827407', 'step': 12884, 'epoch': 2} {'type': 'loss', 'content': 0.11715177446603775, 'timestamp': '2025-10-01 04:34:33.830020', 'step': 12885, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:33.884826', 'step': 12885, 'epoch': 2} {'type': 'loss', 'content': 0.07431987673044205, 'timestamp': '2025-10-01 04:34:33.887457', 'step': 12886, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:33.941941', 'step': 12886, 'epoch': 2} {'type': 'loss', 'content': 0.06390027701854706, 'timestamp': '2025-10-01 04:34:33.944637', 'step': 12887, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:33.998529', 'step': 12887, 'epoch': 2} {'type': 'loss', 'content': 0.17736445367336273, 'timestamp': '2025-10-01 04:34:34.009890', 'step': 12888, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:34.068331', 'step': 12888, 'epoch': 2} {'type': 'loss', 'content': 0.06273823231458664, 'timestamp': '2025-10-01 04:34:34.070906', 'step': 12889, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:34.124592', 'step': 12889, 'epoch': 2} {'type': 'loss', 'content': 0.06499674171209335, 'timestamp': '2025-10-01 04:34:34.127119', 'step': 12890, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:34.182548', 'step': 12890, 'epoch': 2} {'type': 'loss', 'content': 0.09778966009616852, 'timestamp': '2025-10-01 04:34:34.185512', 'step': 12891, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:34.238991', 'step': 12891, 'epoch': 2} {'type': 'loss', 'content': 0.0998026430606842, 'timestamp': '2025-10-01 04:34:34.245014', 'step': 12892, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:34.299254', 'step': 12892, 'epoch': 2} {'type': 'loss', 'content': 0.11217255890369415, 'timestamp': '2025-10-01 04:34:34.301794', 'step': 12893, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:34.373067', 'step': 12893, 'epoch': 2} {'type': 'loss', 'content': 0.11122965067625046, 'timestamp': '2025-10-01 04:34:34.375349', 'step': 12894, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:34.436842', 'step': 12894, 'epoch': 2} {'type': 'loss', 'content': 0.16338476538658142, 'timestamp': '2025-10-01 04:34:34.438873', 'step': 12895, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:34.492553', 'step': 12895, 'epoch': 2} {'type': 'loss', 'content': 0.0829944983124733, 'timestamp': '2025-10-01 04:34:34.498343', 'step': 12896, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:34.551034', 'step': 12896, 'epoch': 2} {'type': 'loss', 'content': 0.11376005411148071, 'timestamp': '2025-10-01 04:34:34.553282', 'step': 12897, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:34.610787', 'step': 12897, 'epoch': 2} {'type': 'loss', 'content': 0.13225820660591125, 'timestamp': '2025-10-01 04:34:34.612906', 'step': 12898, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:34.673924', 'step': 12898, 'epoch': 2} {'type': 'loss', 'content': 0.10345548391342163, 'timestamp': '2025-10-01 04:34:34.676860', 'step': 12899, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:34.729507', 'step': 12899, 'epoch': 2} {'type': 'loss', 'content': 0.09805100411176682, 'timestamp': '2025-10-01 04:34:34.735251', 'step': 12900, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:34.787837', 'step': 12900, 'epoch': 2} {'type': 'loss', 'content': 0.17691011726856232, 'timestamp': '2025-10-01 04:34:34.789930', 'step': 12901, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:34.842960', 'step': 12901, 'epoch': 2} {'type': 'loss', 'content': 0.11728499084711075, 'timestamp': '2025-10-01 04:34:34.844915', 'step': 12902, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:34.899552', 'step': 12902, 'epoch': 2} {'type': 'loss', 'content': 0.10763447731733322, 'timestamp': '2025-10-01 04:34:34.901516', 'step': 12903, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:34.965815', 'step': 12903, 'epoch': 2} {'type': 'loss', 'content': 0.09739135950803757, 'timestamp': '2025-10-01 04:34:34.971268', 'step': 12904, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:35.023889', 'step': 12904, 'epoch': 2} {'type': 'loss', 'content': 0.10349258780479431, 'timestamp': '2025-10-01 04:34:35.025813', 'step': 12905, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:35.079125', 'step': 12905, 'epoch': 2} {'type': 'loss', 'content': 0.12916474044322968, 'timestamp': '2025-10-01 04:34:35.081571', 'step': 12906, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:35.134825', 'step': 12906, 'epoch': 2} {'type': 'loss', 'content': 0.07686517387628555, 'timestamp': '2025-10-01 04:34:35.137176', 'step': 12907, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:35.190108', 'step': 12907, 'epoch': 2} {'type': 'loss', 'content': 0.08622725307941437, 'timestamp': '2025-10-01 04:34:35.195610', 'step': 12908, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:35.249447', 'step': 12908, 'epoch': 2} {'type': 'loss', 'content': 0.1302173137664795, 'timestamp': '2025-10-01 04:34:35.251770', 'step': 12909, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:34:35.305290', 'step': 12909, 'epoch': 2} {'type': 'loss', 'content': 0.10263755917549133, 'timestamp': '2025-10-01 04:34:35.307279', 'step': 12910, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:35.362186', 'step': 12910, 'epoch': 2} {'type': 'loss', 'content': 0.18047469854354858, 'timestamp': '2025-10-01 04:34:35.364203', 'step': 12911, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:35.417937', 'step': 12911, 'epoch': 2} {'type': 'loss', 'content': 0.08111976832151413, 'timestamp': '2025-10-01 04:34:35.424269', 'step': 12912, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:34:35.477828', 'step': 12912, 'epoch': 2} {'type': 'loss', 'content': 0.1060808077454567, 'timestamp': '2025-10-01 04:34:35.479980', 'step': 12913, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:35.533363', 'step': 12913, 'epoch': 2} {'type': 'loss', 'content': 0.199917733669281, 'timestamp': '2025-10-01 04:34:35.535480', 'step': 12914, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:35.588578', 'step': 12914, 'epoch': 2} {'type': 'loss', 'content': 0.11621127277612686, 'timestamp': '2025-10-01 04:34:35.590886', 'step': 12915, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:35.645467', 'step': 12915, 'epoch': 2} {'type': 'loss', 'content': 0.06852713227272034, 'timestamp': '2025-10-01 04:34:35.655135', 'step': 12916, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:35.707863', 'step': 12916, 'epoch': 2} {'type': 'loss', 'content': 0.09304630756378174, 'timestamp': '2025-10-01 04:34:35.709736', 'step': 12917, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:35.762137', 'step': 12917, 'epoch': 2} {'type': 'loss', 'content': 0.2344592660665512, 'timestamp': '2025-10-01 04:34:35.763996', 'step': 12918, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:35.817656', 'step': 12918, 'epoch': 2} {'type': 'loss', 'content': 0.09390376508235931, 'timestamp': '2025-10-01 04:34:35.819540', 'step': 12919, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:35.874722', 'step': 12919, 'epoch': 2} {'type': 'loss', 'content': 0.1522834151983261, 'timestamp': '2025-10-01 04:34:35.880347', 'step': 12920, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:34:35.932727', 'step': 12920, 'epoch': 2} {'type': 'loss', 'content': 0.08787227421998978, 'timestamp': '2025-10-01 04:34:35.935542', 'step': 12921, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:35.990674', 'step': 12921, 'epoch': 2} {'type': 'loss', 'content': 0.10526865720748901, 'timestamp': '2025-10-01 04:34:35.992825', 'step': 12922, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:36.045884', 'step': 12922, 'epoch': 2} {'type': 'loss', 'content': 0.085942842066288, 'timestamp': '2025-10-01 04:34:36.047800', 'step': 12923, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:36.101848', 'step': 12923, 'epoch': 2} {'type': 'loss', 'content': 0.05111324042081833, 'timestamp': '2025-10-01 04:34:36.108593', 'step': 12924, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:36.162258', 'step': 12924, 'epoch': 2} {'type': 'loss', 'content': 0.059964582324028015, 'timestamp': '2025-10-01 04:34:36.166277', 'step': 12925, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:36.223748', 'step': 12925, 'epoch': 2} {'type': 'loss', 'content': 0.1606450378894806, 'timestamp': '2025-10-01 04:34:36.226132', 'step': 12926, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:36.279924', 'step': 12926, 'epoch': 2} {'type': 'loss', 'content': 0.10990434885025024, 'timestamp': '2025-10-01 04:34:36.282265', 'step': 12927, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:36.336882', 'step': 12927, 'epoch': 2} {'type': 'loss', 'content': 0.17979159951210022, 'timestamp': '2025-10-01 04:34:36.343104', 'step': 12928, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:34:36.396457', 'step': 12928, 'epoch': 2} {'type': 'loss', 'content': 0.0616907961666584, 'timestamp': '2025-10-01 04:34:36.398562', 'step': 12929, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:36.452634', 'step': 12929, 'epoch': 2} {'type': 'loss', 'content': 0.20152224600315094, 'timestamp': '2025-10-01 04:34:36.454832', 'step': 12930, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:36.507528', 'step': 12930, 'epoch': 2} {'type': 'loss', 'content': 0.10895611345767975, 'timestamp': '2025-10-01 04:34:36.509975', 'step': 12931, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:36.564455', 'step': 12931, 'epoch': 2} {'type': 'loss', 'content': 0.07220818847417831, 'timestamp': '2025-10-01 04:34:36.571289', 'step': 12932, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:36.624452', 'step': 12932, 'epoch': 2} {'type': 'loss', 'content': 0.1189069151878357, 'timestamp': '2025-10-01 04:34:36.626901', 'step': 12933, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:36.679915', 'step': 12933, 'epoch': 2} {'type': 'loss', 'content': 0.1691511869430542, 'timestamp': '2025-10-01 04:34:36.681917', 'step': 12934, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:36.735026', 'step': 12934, 'epoch': 2} {'type': 'loss', 'content': 0.06718567758798599, 'timestamp': '2025-10-01 04:34:36.738200', 'step': 12935, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:36.791773', 'step': 12935, 'epoch': 2} {'type': 'loss', 'content': 0.068110391497612, 'timestamp': '2025-10-01 04:34:36.797970', 'step': 12936, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:36.852347', 'step': 12936, 'epoch': 2} {'type': 'loss', 'content': 0.22600868344306946, 'timestamp': '2025-10-01 04:34:36.857118', 'step': 12937, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:36.912117', 'step': 12937, 'epoch': 2} {'type': 'loss', 'content': 0.15852400660514832, 'timestamp': '2025-10-01 04:34:36.914028', 'step': 12938, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:36.967643', 'step': 12938, 'epoch': 2} {'type': 'loss', 'content': 0.1237248107790947, 'timestamp': '2025-10-01 04:34:36.969690', 'step': 12939, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:37.023302', 'step': 12939, 'epoch': 2} {'type': 'loss', 'content': 0.09459206461906433, 'timestamp': '2025-10-01 04:34:37.028878', 'step': 12940, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:37.082451', 'step': 12940, 'epoch': 2} {'type': 'loss', 'content': 0.2167641818523407, 'timestamp': '2025-10-01 04:34:37.092253', 'step': 12941, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:37.147995', 'step': 12941, 'epoch': 2} {'type': 'loss', 'content': 0.06782282143831253, 'timestamp': '2025-10-01 04:34:37.151009', 'step': 12942, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:37.206943', 'step': 12942, 'epoch': 2} {'type': 'loss', 'content': 0.08819174021482468, 'timestamp': '2025-10-01 04:34:37.209653', 'step': 12943, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:37.262908', 'step': 12943, 'epoch': 2} {'type': 'loss', 'content': 0.1512949913740158, 'timestamp': '2025-10-01 04:34:37.268613', 'step': 12944, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:37.322062', 'step': 12944, 'epoch': 2} {'type': 'loss', 'content': 0.08297169208526611, 'timestamp': '2025-10-01 04:34:37.324160', 'step': 12945, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:37.378173', 'step': 12945, 'epoch': 2} {'type': 'loss', 'content': 0.2088921070098877, 'timestamp': '2025-10-01 04:34:37.380075', 'step': 12946, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:37.433486', 'step': 12946, 'epoch': 2} {'type': 'loss', 'content': 0.11952464282512665, 'timestamp': '2025-10-01 04:34:37.435471', 'step': 12947, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:37.489104', 'step': 12947, 'epoch': 2} {'type': 'loss', 'content': 0.11324749886989594, 'timestamp': '2025-10-01 04:34:37.494814', 'step': 12948, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:37.547694', 'step': 12948, 'epoch': 2} {'type': 'loss', 'content': 0.07306298613548279, 'timestamp': '2025-10-01 04:34:37.550276', 'step': 12949, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:37.603132', 'step': 12949, 'epoch': 2} {'type': 'loss', 'content': 0.14899715781211853, 'timestamp': '2025-10-01 04:34:37.605288', 'step': 12950, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:37.659679', 'step': 12950, 'epoch': 2} {'type': 'loss', 'content': 0.04477497562766075, 'timestamp': '2025-10-01 04:34:37.661709', 'step': 12951, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:37.715419', 'step': 12951, 'epoch': 2} {'type': 'loss', 'content': 0.08428069949150085, 'timestamp': '2025-10-01 04:34:37.721092', 'step': 12952, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:37.774299', 'step': 12952, 'epoch': 2} {'type': 'loss', 'content': 0.1734333336353302, 'timestamp': '2025-10-01 04:34:37.778414', 'step': 12953, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:34:37.834280', 'step': 12953, 'epoch': 2} {'type': 'loss', 'content': 0.0976940169930458, 'timestamp': '2025-10-01 04:34:37.837443', 'step': 12954, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:37.891949', 'step': 12954, 'epoch': 2} {'type': 'loss', 'content': 0.14500032365322113, 'timestamp': '2025-10-01 04:34:37.894960', 'step': 12955, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:37.948597', 'step': 12955, 'epoch': 2} {'type': 'loss', 'content': 0.1962399184703827, 'timestamp': '2025-10-01 04:34:37.954412', 'step': 12956, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:38.007928', 'step': 12956, 'epoch': 2} {'type': 'loss', 'content': 0.12458211928606033, 'timestamp': '2025-10-01 04:34:38.009971', 'step': 12957, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:38.062991', 'step': 12957, 'epoch': 2} {'type': 'loss', 'content': 0.06611357629299164, 'timestamp': '2025-10-01 04:34:38.065154', 'step': 12958, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:38.119174', 'step': 12958, 'epoch': 2} {'type': 'loss', 'content': 0.12022166699171066, 'timestamp': '2025-10-01 04:34:38.134241', 'step': 12959, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:34:38.188393', 'step': 12959, 'epoch': 2} {'type': 'loss', 'content': 0.12262355536222458, 'timestamp': '2025-10-01 04:34:38.194011', 'step': 12960, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:38.247833', 'step': 12960, 'epoch': 2} {'type': 'loss', 'content': 0.11835139989852905, 'timestamp': '2025-10-01 04:34:38.250024', 'step': 12961, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:38.315696', 'step': 12961, 'epoch': 2} {'type': 'loss', 'content': 0.2266312837600708, 'timestamp': '2025-10-01 04:34:38.317898', 'step': 12962, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:38.370929', 'step': 12962, 'epoch': 2} {'type': 'loss', 'content': 0.013474964536726475, 'timestamp': '2025-10-01 04:34:38.386385', 'step': 12963, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:38.442588', 'step': 12963, 'epoch': 2} {'type': 'loss', 'content': 0.09709545224905014, 'timestamp': '2025-10-01 04:34:38.448175', 'step': 12964, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:34:38.501675', 'step': 12964, 'epoch': 2} {'type': 'loss', 'content': 0.10707661509513855, 'timestamp': '2025-10-01 04:34:38.503564', 'step': 12965, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:34:38.558179', 'step': 12965, 'epoch': 2} {'type': 'loss', 'content': 0.1689954549074173, 'timestamp': '2025-10-01 04:34:38.560331', 'step': 12966, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:38.614070', 'step': 12966, 'epoch': 2} {'type': 'loss', 'content': 0.15538743138313293, 'timestamp': '2025-10-01 04:34:38.616203', 'step': 12967, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:38.672634', 'step': 12967, 'epoch': 2} {'type': 'loss', 'content': 0.12754781544208527, 'timestamp': '2025-10-01 04:34:38.678167', 'step': 12968, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:38.731024', 'step': 12968, 'epoch': 2} {'type': 'loss', 'content': 0.15123790502548218, 'timestamp': '2025-10-01 04:34:38.733219', 'step': 12969, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:38.786443', 'step': 12969, 'epoch': 2} {'type': 'loss', 'content': 0.12162270396947861, 'timestamp': '2025-10-01 04:34:38.788596', 'step': 12970, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:38.841753', 'step': 12970, 'epoch': 2} {'type': 'loss', 'content': 0.13342803716659546, 'timestamp': '2025-10-01 04:34:38.844472', 'step': 12971, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:38.897913', 'step': 12971, 'epoch': 2} {'type': 'loss', 'content': 0.09148640185594559, 'timestamp': '2025-10-01 04:34:38.903669', 'step': 12972, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:34:38.956427', 'step': 12972, 'epoch': 2} {'type': 'loss', 'content': 0.14287517964839935, 'timestamp': '2025-10-01 04:34:38.958442', 'step': 12973, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:39.011867', 'step': 12973, 'epoch': 2} {'type': 'loss', 'content': 0.08669598400592804, 'timestamp': '2025-10-01 04:34:39.013992', 'step': 12974, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:34:39.067224', 'step': 12974, 'epoch': 2} {'type': 'loss', 'content': 0.10625633597373962, 'timestamp': '2025-10-01 04:34:39.070864', 'step': 12975, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:39.127547', 'step': 12975, 'epoch': 2} {'type': 'loss', 'content': 0.06071939319372177, 'timestamp': '2025-10-01 04:34:39.133170', 'step': 12976, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:34:39.185453', 'step': 12976, 'epoch': 2} {'type': 'loss', 'content': 0.11557040363550186, 'timestamp': '2025-10-01 04:34:39.187597', 'step': 12977, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:39.241116', 'step': 12977, 'epoch': 2} {'type': 'loss', 'content': 0.13532927632331848, 'timestamp': '2025-10-01 04:34:39.243012', 'step': 12978, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:39.296641', 'step': 12978, 'epoch': 2} {'type': 'loss', 'content': 0.12452433258295059, 'timestamp': '2025-10-01 04:34:39.298864', 'step': 12979, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:39.352433', 'step': 12979, 'epoch': 2} {'type': 'loss', 'content': 0.0736655667424202, 'timestamp': '2025-10-01 04:34:39.358066', 'step': 12980, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:39.410723', 'step': 12980, 'epoch': 2} {'type': 'loss', 'content': 0.10971209406852722, 'timestamp': '2025-10-01 04:34:39.413146', 'step': 12981, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:39.466038', 'step': 12981, 'epoch': 2} {'type': 'loss', 'content': 0.06553423404693604, 'timestamp': '2025-10-01 04:34:39.467942', 'step': 12982, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:34:39.521725', 'step': 12982, 'epoch': 2} {'type': 'loss', 'content': 0.11574460566043854, 'timestamp': '2025-10-01 04:34:39.524529', 'step': 12983, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:39.578742', 'step': 12983, 'epoch': 2} {'type': 'loss', 'content': 0.10171312093734741, 'timestamp': '2025-10-01 04:34:39.584466', 'step': 12984, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:39.637837', 'step': 12984, 'epoch': 2} {'type': 'loss', 'content': 0.0720437616109848, 'timestamp': '2025-10-01 04:34:39.639901', 'step': 12985, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-01 04:34:39.693040', 'step': 12985, 'epoch': 2} {'type': 'loss', 'content': 0.19649213552474976, 'timestamp': '2025-10-01 04:34:39.695341', 'step': 12986, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:34:39.749243', 'step': 12986, 'epoch': 2} {'type': 'loss', 'content': 0.10949315130710602, 'timestamp': '2025-10-01 04:34:39.761886', 'step': 12987, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:39.815104', 'step': 12987, 'epoch': 2} {'type': 'loss', 'content': 0.11333463340997696, 'timestamp': '2025-10-01 04:34:39.820651', 'step': 12988, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:39.873217', 'step': 12988, 'epoch': 2} {'type': 'loss', 'content': 0.07407169789075851, 'timestamp': '2025-10-01 04:34:39.875456', 'step': 12989, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:39.928724', 'step': 12989, 'epoch': 2} {'type': 'loss', 'content': 0.0824536383152008, 'timestamp': '2025-10-01 04:34:39.931164', 'step': 12990, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:39.984872', 'step': 12990, 'epoch': 2} {'type': 'loss', 'content': 0.11744198948144913, 'timestamp': '2025-10-01 04:34:39.987485', 'step': 12991, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:40.040638', 'step': 12991, 'epoch': 2} {'type': 'loss', 'content': 0.19360370934009552, 'timestamp': '2025-10-01 04:34:40.046414', 'step': 12992, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:40.099505', 'step': 12992, 'epoch': 2} {'type': 'loss', 'content': 0.11715856194496155, 'timestamp': '2025-10-01 04:34:40.101543', 'step': 12993, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:40.155409', 'step': 12993, 'epoch': 2} {'type': 'loss', 'content': 0.19322684407234192, 'timestamp': '2025-10-01 04:34:40.157976', 'step': 12994, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-01 04:34:40.211648', 'step': 12994, 'epoch': 2} {'type': 'loss', 'content': 0.14788652956485748, 'timestamp': '2025-10-01 04:34:40.213547', 'step': 12995, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:40.267966', 'step': 12995, 'epoch': 2} {'type': 'loss', 'content': 0.14067548513412476, 'timestamp': '2025-10-01 04:34:40.273636', 'step': 12996, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:40.326836', 'step': 12996, 'epoch': 2} {'type': 'loss', 'content': 0.10276897251605988, 'timestamp': '2025-10-01 04:34:40.329065', 'step': 12997, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:40.382527', 'step': 12997, 'epoch': 2} {'type': 'loss', 'content': 0.1636575609445572, 'timestamp': '2025-10-01 04:34:40.384618', 'step': 12998, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:34:40.438640', 'step': 12998, 'epoch': 2} {'type': 'loss', 'content': 0.18465499579906464, 'timestamp': '2025-10-01 04:34:40.451600', 'step': 12999, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:34:40.508171', 'step': 12999, 'epoch': 2} {'type': 'loss', 'content': 0.10865645110607147, 'timestamp': '2025-10-01 04:34:40.515843', 'step': 13000, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 13000', 'timestamp': '2025-10-01 04:34:40.893709', 'step': 13000, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:34:40.953494', 'step': 13000, 'epoch': 2} {'type': 'loss', 'content': 0.06366576999425888, 'timestamp': '2025-10-01 04:34:40.955731', 'step': 13001, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:41.024177', 'step': 13001, 'epoch': 2} {'type': 'loss', 'content': 0.12408988922834396, 'timestamp': '2025-10-01 04:34:41.035142', 'step': 13002, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:41.090757', 'step': 13002, 'epoch': 2} {'type': 'loss', 'content': 0.11131332814693451, 'timestamp': '2025-10-01 04:34:41.092593', 'step': 13003, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:41.147132', 'step': 13003, 'epoch': 2} {'type': 'loss', 'content': 0.10965535044670105, 'timestamp': '2025-10-01 04:34:41.152892', 'step': 13004, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:41.205507', 'step': 13004, 'epoch': 2} {'type': 'loss', 'content': 0.0854894369840622, 'timestamp': '2025-10-01 04:34:41.207724', 'step': 13005, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:41.260728', 'step': 13005, 'epoch': 2} {'type': 'loss', 'content': 0.15650375187397003, 'timestamp': '2025-10-01 04:34:41.262946', 'step': 13006, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:41.316546', 'step': 13006, 'epoch': 2} {'type': 'loss', 'content': 0.07533431053161621, 'timestamp': '2025-10-01 04:34:41.319022', 'step': 13007, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:41.373162', 'step': 13007, 'epoch': 2} {'type': 'loss', 'content': 0.14836324751377106, 'timestamp': '2025-10-01 04:34:41.379319', 'step': 13008, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:41.437141', 'step': 13008, 'epoch': 2} {'type': 'loss', 'content': 0.15348243713378906, 'timestamp': '2025-10-01 04:34:41.439048', 'step': 13009, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:41.493131', 'step': 13009, 'epoch': 2} {'type': 'loss', 'content': 0.10852430015802383, 'timestamp': '2025-10-01 04:34:41.495151', 'step': 13010, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:41.548973', 'step': 13010, 'epoch': 2} {'type': 'loss', 'content': 0.22194114327430725, 'timestamp': '2025-10-01 04:34:41.561043', 'step': 13011, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:41.624812', 'step': 13011, 'epoch': 2} {'type': 'loss', 'content': 0.20894567668437958, 'timestamp': '2025-10-01 04:34:41.633374', 'step': 13012, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:41.696566', 'step': 13012, 'epoch': 2} {'type': 'loss', 'content': 0.2230227291584015, 'timestamp': '2025-10-01 04:34:41.698703', 'step': 13013, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:41.752918', 'step': 13013, 'epoch': 2} {'type': 'loss', 'content': 0.12717022001743317, 'timestamp': '2025-10-01 04:34:41.754968', 'step': 13014, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:41.809535', 'step': 13014, 'epoch': 2} {'type': 'loss', 'content': 0.09553370624780655, 'timestamp': '2025-10-01 04:34:41.811455', 'step': 13015, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:41.868925', 'step': 13015, 'epoch': 2} {'type': 'loss', 'content': 0.1112934947013855, 'timestamp': '2025-10-01 04:34:41.874885', 'step': 13016, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:41.941675', 'step': 13016, 'epoch': 2} {'type': 'loss', 'content': 0.1272408664226532, 'timestamp': '2025-10-01 04:34:41.943942', 'step': 13017, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:34:41.998053', 'step': 13017, 'epoch': 2} {'type': 'loss', 'content': 0.12459731101989746, 'timestamp': '2025-10-01 04:34:42.000720', 'step': 13018, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:42.056602', 'step': 13018, 'epoch': 2} {'type': 'loss', 'content': 0.11444640159606934, 'timestamp': '2025-10-01 04:34:42.059200', 'step': 13019, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:42.113430', 'step': 13019, 'epoch': 2} {'type': 'loss', 'content': 0.06898065656423569, 'timestamp': '2025-10-01 04:34:42.119495', 'step': 13020, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:42.174985', 'step': 13020, 'epoch': 2} {'type': 'loss', 'content': 0.08573443442583084, 'timestamp': '2025-10-01 04:34:42.176932', 'step': 13021, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:42.233858', 'step': 13021, 'epoch': 2} {'type': 'loss', 'content': 0.0665992721915245, 'timestamp': '2025-10-01 04:34:42.238364', 'step': 13022, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:34:42.296042', 'step': 13022, 'epoch': 2} {'type': 'loss', 'content': 0.1349557489156723, 'timestamp': '2025-10-01 04:34:42.298866', 'step': 13023, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-01 04:34:42.355674', 'step': 13023, 'epoch': 2} {'type': 'loss', 'content': 0.08284219354391098, 'timestamp': '2025-10-01 04:34:42.362339', 'step': 13024, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:34:42.418131', 'step': 13024, 'epoch': 2} {'type': 'loss', 'content': 0.1410568505525589, 'timestamp': '2025-10-01 04:34:42.420824', 'step': 13025, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:42.476156', 'step': 13025, 'epoch': 2} {'type': 'loss', 'content': 0.2542685568332672, 'timestamp': '2025-10-01 04:34:42.478546', 'step': 13026, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:42.537418', 'step': 13026, 'epoch': 2} {'type': 'loss', 'content': 0.09213363379240036, 'timestamp': '2025-10-01 04:34:42.539894', 'step': 13027, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:34:42.595713', 'step': 13027, 'epoch': 2} {'type': 'loss', 'content': 0.10931415855884552, 'timestamp': '2025-10-01 04:34:42.602564', 'step': 13028, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:42.657128', 'step': 13028, 'epoch': 2} {'type': 'loss', 'content': 0.16198395192623138, 'timestamp': '2025-10-01 04:34:42.661420', 'step': 13029, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:42.716707', 'step': 13029, 'epoch': 2} {'type': 'loss', 'content': 0.13702061772346497, 'timestamp': '2025-10-01 04:34:42.718893', 'step': 13030, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:42.773680', 'step': 13030, 'epoch': 2} {'type': 'loss', 'content': 0.08102886378765106, 'timestamp': '2025-10-01 04:34:42.775882', 'step': 13031, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:34:42.830974', 'step': 13031, 'epoch': 2} {'type': 'loss', 'content': 0.10899525880813599, 'timestamp': '2025-10-01 04:34:42.836857', 'step': 13032, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:42.891582', 'step': 13032, 'epoch': 2} {'type': 'loss', 'content': 0.038499362766742706, 'timestamp': '2025-10-01 04:34:42.894051', 'step': 13033, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:42.948403', 'step': 13033, 'epoch': 2} {'type': 'loss', 'content': 0.11050806939601898, 'timestamp': '2025-10-01 04:34:42.950676', 'step': 13034, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:34:43.005024', 'step': 13034, 'epoch': 2} {'type': 'loss', 'content': 0.09643804281949997, 'timestamp': '2025-10-01 04:34:43.007523', 'step': 13035, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:43.061895', 'step': 13035, 'epoch': 2} {'type': 'loss', 'content': 0.0607401467859745, 'timestamp': '2025-10-01 04:34:43.067902', 'step': 13036, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:43.121166', 'step': 13036, 'epoch': 2} {'type': 'loss', 'content': 0.1354253888130188, 'timestamp': '2025-10-01 04:34:43.123645', 'step': 13037, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:43.177811', 'step': 13037, 'epoch': 2} {'type': 'loss', 'content': 0.10763602703809738, 'timestamp': '2025-10-01 04:34:43.180060', 'step': 13038, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:43.234336', 'step': 13038, 'epoch': 2} {'type': 'loss', 'content': 0.12861241400241852, 'timestamp': '2025-10-01 04:34:43.236666', 'step': 13039, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:34:43.290729', 'step': 13039, 'epoch': 2} {'type': 'loss', 'content': 0.16920940577983856, 'timestamp': '2025-10-01 04:34:43.296887', 'step': 13040, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:43.351199', 'step': 13040, 'epoch': 2} {'type': 'loss', 'content': 0.07742222398519516, 'timestamp': '2025-10-01 04:34:43.353719', 'step': 13041, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:43.411794', 'step': 13041, 'epoch': 2} {'type': 'loss', 'content': 0.07626400142908096, 'timestamp': '2025-10-01 04:34:43.414323', 'step': 13042, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:43.469491', 'step': 13042, 'epoch': 2} {'type': 'loss', 'content': 0.09029650688171387, 'timestamp': '2025-10-01 04:34:43.472297', 'step': 13043, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:43.527113', 'step': 13043, 'epoch': 2} {'type': 'loss', 'content': 0.14082659780979156, 'timestamp': '2025-10-01 04:34:43.533298', 'step': 13044, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:43.588179', 'step': 13044, 'epoch': 2} {'type': 'loss', 'content': 0.1843685805797577, 'timestamp': '2025-10-01 04:34:43.590558', 'step': 13045, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:43.645569', 'step': 13045, 'epoch': 2} {'type': 'loss', 'content': 0.028971200808882713, 'timestamp': '2025-10-01 04:34:43.647928', 'step': 13046, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:34:43.701441', 'step': 13046, 'epoch': 2} {'type': 'loss', 'content': 0.05592000111937523, 'timestamp': '2025-10-01 04:34:43.703955', 'step': 13047, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:43.757146', 'step': 13047, 'epoch': 2} {'type': 'loss', 'content': 0.06902985274791718, 'timestamp': '2025-10-01 04:34:43.762990', 'step': 13048, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-01 04:34:56.916279', 'step': 13048, 'epoch': 2} {'type': 'pplx', 'content': 12455.006334522042, 'timestamp': '2025-10-01 04:34:56.919871', 'step': 13048, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:56.974811', 'step': 13048, 'epoch': 2} {'type': 'loss', 'content': 0.10797057300806046, 'timestamp': '2025-10-01 04:34:56.978793', 'step': 13049, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:57.037693', 'step': 13049, 'epoch': 2} {'type': 'loss', 'content': 0.08152596652507782, 'timestamp': '2025-10-01 04:34:57.040056', 'step': 13050, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:57.098301', 'step': 13050, 'epoch': 2} {'type': 'loss', 'content': 0.15253323316574097, 'timestamp': '2025-10-01 04:34:57.100809', 'step': 13051, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:34:57.162371', 'step': 13051, 'epoch': 2} {'type': 'loss', 'content': 0.06513847410678864, 'timestamp': '2025-10-01 04:34:57.170381', 'step': 13052, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:57.223893', 'step': 13052, 'epoch': 2} {'type': 'loss', 'content': 0.12042305618524551, 'timestamp': '2025-10-01 04:34:57.226248', 'step': 13053, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:57.284755', 'step': 13053, 'epoch': 2} {'type': 'loss', 'content': 0.08708935976028442, 'timestamp': '2025-10-01 04:34:57.288889', 'step': 13054, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:57.347521', 'step': 13054, 'epoch': 2} {'type': 'loss', 'content': 0.12208481132984161, 'timestamp': '2025-10-01 04:34:57.349929', 'step': 13055, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:34:57.409222', 'step': 13055, 'epoch': 2} {'type': 'loss', 'content': 0.09777092188596725, 'timestamp': '2025-10-01 04:34:57.415434', 'step': 13056, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:57.471440', 'step': 13056, 'epoch': 2} {'type': 'loss', 'content': 0.26474180817604065, 'timestamp': '2025-10-01 04:34:57.473701', 'step': 13057, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:57.531569', 'step': 13057, 'epoch': 2} {'type': 'loss', 'content': 0.09534766525030136, 'timestamp': '2025-10-01 04:34:57.533805', 'step': 13058, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:57.601188', 'step': 13058, 'epoch': 2} {'type': 'loss', 'content': 0.11904579401016235, 'timestamp': '2025-10-01 04:34:57.603914', 'step': 13059, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:57.664332', 'step': 13059, 'epoch': 2} {'type': 'loss', 'content': 0.11588346213102341, 'timestamp': '2025-10-01 04:34:57.671528', 'step': 13060, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:57.733481', 'step': 13060, 'epoch': 2} {'type': 'loss', 'content': 0.1683216094970703, 'timestamp': '2025-10-01 04:34:57.736195', 'step': 13061, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:57.799194', 'step': 13061, 'epoch': 2} {'type': 'loss', 'content': 0.1101599633693695, 'timestamp': '2025-10-01 04:34:57.802875', 'step': 13062, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:57.866540', 'step': 13062, 'epoch': 2} {'type': 'loss', 'content': 0.15015307068824768, 'timestamp': '2025-10-01 04:34:57.870191', 'step': 13063, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:57.931373', 'step': 13063, 'epoch': 2} {'type': 'loss', 'content': 0.11030510067939758, 'timestamp': '2025-10-01 04:34:57.939768', 'step': 13064, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:57.999625', 'step': 13064, 'epoch': 2} {'type': 'loss', 'content': 0.13438154757022858, 'timestamp': '2025-10-01 04:34:58.002663', 'step': 13065, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:58.065789', 'step': 13065, 'epoch': 2} {'type': 'loss', 'content': 0.04952327162027359, 'timestamp': '2025-10-01 04:34:58.068170', 'step': 13066, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:34:58.132398', 'step': 13066, 'epoch': 2} {'type': 'loss', 'content': 0.13146618008613586, 'timestamp': '2025-10-01 04:34:58.134722', 'step': 13067, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:58.192149', 'step': 13067, 'epoch': 2} {'type': 'loss', 'content': 0.14765745401382446, 'timestamp': '2025-10-01 04:34:58.198520', 'step': 13068, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:58.254793', 'step': 13068, 'epoch': 2} {'type': 'loss', 'content': 0.09489612281322479, 'timestamp': '2025-10-01 04:34:58.256978', 'step': 13069, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:34:58.310694', 'step': 13069, 'epoch': 2} {'type': 'loss', 'content': 0.07123774290084839, 'timestamp': '2025-10-01 04:34:58.312899', 'step': 13070, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:58.365976', 'step': 13070, 'epoch': 2} {'type': 'loss', 'content': 0.08626312762498856, 'timestamp': '2025-10-01 04:34:58.368586', 'step': 13071, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:34:58.427246', 'step': 13071, 'epoch': 2} {'type': 'loss', 'content': 0.10297654569149017, 'timestamp': '2025-10-01 04:34:58.433567', 'step': 13072, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:58.486006', 'step': 13072, 'epoch': 2} {'type': 'loss', 'content': 0.11651238054037094, 'timestamp': '2025-10-01 04:34:58.488069', 'step': 13073, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:58.541189', 'step': 13073, 'epoch': 2} {'type': 'loss', 'content': 0.12758280336856842, 'timestamp': '2025-10-01 04:34:58.543259', 'step': 13074, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:58.597930', 'step': 13074, 'epoch': 2} {'type': 'loss', 'content': 0.1019044816493988, 'timestamp': '2025-10-01 04:34:58.600040', 'step': 13075, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:58.653610', 'step': 13075, 'epoch': 2} {'type': 'loss', 'content': 0.07216393202543259, 'timestamp': '2025-10-01 04:34:58.659200', 'step': 13076, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:58.712435', 'step': 13076, 'epoch': 2} {'type': 'loss', 'content': 0.10288216173648834, 'timestamp': '2025-10-01 04:34:58.714448', 'step': 13077, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:58.770354', 'step': 13077, 'epoch': 2} {'type': 'loss', 'content': 0.15464940667152405, 'timestamp': '2025-10-01 04:34:58.772328', 'step': 13078, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:34:58.825435', 'step': 13078, 'epoch': 2} {'type': 'loss', 'content': 0.073985256254673, 'timestamp': '2025-10-01 04:34:58.827580', 'step': 13079, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:58.880769', 'step': 13079, 'epoch': 2} {'type': 'loss', 'content': 0.15970587730407715, 'timestamp': '2025-10-01 04:34:58.886577', 'step': 13080, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:58.941423', 'step': 13080, 'epoch': 2} {'type': 'loss', 'content': 0.17880889773368835, 'timestamp': '2025-10-01 04:34:58.943825', 'step': 13081, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:34:58.998344', 'step': 13081, 'epoch': 2} {'type': 'loss', 'content': 0.13950707018375397, 'timestamp': '2025-10-01 04:34:59.000467', 'step': 13082, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:59.053918', 'step': 13082, 'epoch': 2} {'type': 'loss', 'content': 0.1042000949382782, 'timestamp': '2025-10-01 04:34:59.055900', 'step': 13083, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:59.109145', 'step': 13083, 'epoch': 2} {'type': 'loss', 'content': 0.13598182797431946, 'timestamp': '2025-10-01 04:34:59.115052', 'step': 13084, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:59.175917', 'step': 13084, 'epoch': 2} {'type': 'loss', 'content': 0.15888337790966034, 'timestamp': '2025-10-01 04:34:59.177832', 'step': 13085, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:59.230755', 'step': 13085, 'epoch': 2} {'type': 'loss', 'content': 0.1830257624387741, 'timestamp': '2025-10-01 04:34:59.232644', 'step': 13086, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:59.286537', 'step': 13086, 'epoch': 2} {'type': 'loss', 'content': 0.24591407179832458, 'timestamp': '2025-10-01 04:34:59.288525', 'step': 13087, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:59.341684', 'step': 13087, 'epoch': 2} {'type': 'loss', 'content': 0.11804179102182388, 'timestamp': '2025-10-01 04:34:59.347522', 'step': 13088, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:59.400653', 'step': 13088, 'epoch': 2} {'type': 'loss', 'content': 0.1150737926363945, 'timestamp': '2025-10-01 04:34:59.402910', 'step': 13089, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:59.456031', 'step': 13089, 'epoch': 2} {'type': 'loss', 'content': 0.145621657371521, 'timestamp': '2025-10-01 04:34:59.458172', 'step': 13090, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:59.511848', 'step': 13090, 'epoch': 2} {'type': 'loss', 'content': 0.128475159406662, 'timestamp': '2025-10-01 04:34:59.514019', 'step': 13091, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:59.567249', 'step': 13091, 'epoch': 2} {'type': 'loss', 'content': 0.11872165650129318, 'timestamp': '2025-10-01 04:34:59.573950', 'step': 13092, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:59.626924', 'step': 13092, 'epoch': 2} {'type': 'loss', 'content': 0.15510690212249756, 'timestamp': '2025-10-01 04:34:59.628910', 'step': 13093, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:59.683750', 'step': 13093, 'epoch': 2} {'type': 'loss', 'content': 0.07844873517751694, 'timestamp': '2025-10-01 04:34:59.685667', 'step': 13094, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:59.738969', 'step': 13094, 'epoch': 2} {'type': 'loss', 'content': 0.1555497795343399, 'timestamp': '2025-10-01 04:34:59.743683', 'step': 13095, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:34:59.796966', 'step': 13095, 'epoch': 2} {'type': 'loss', 'content': 0.1468789279460907, 'timestamp': '2025-10-01 04:34:59.802564', 'step': 13096, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:34:59.855679', 'step': 13096, 'epoch': 2} {'type': 'loss', 'content': 0.10400576889514923, 'timestamp': '2025-10-01 04:34:59.857880', 'step': 13097, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:34:59.911343', 'step': 13097, 'epoch': 2} {'type': 'loss', 'content': 0.09526892751455307, 'timestamp': '2025-10-01 04:34:59.913423', 'step': 13098, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:34:59.966788', 'step': 13098, 'epoch': 2} {'type': 'loss', 'content': 0.16118134558200836, 'timestamp': '2025-10-01 04:34:59.969165', 'step': 13099, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:35:00.022847', 'step': 13099, 'epoch': 2} {'type': 'loss', 'content': 0.09029310196638107, 'timestamp': '2025-10-01 04:35:00.028662', 'step': 13100, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:00.081470', 'step': 13100, 'epoch': 2} {'type': 'loss', 'content': 0.09893617033958435, 'timestamp': '2025-10-01 04:35:00.083557', 'step': 13101, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:35:00.140716', 'step': 13101, 'epoch': 2} {'type': 'loss', 'content': 0.22599858045578003, 'timestamp': '2025-10-01 04:35:00.142953', 'step': 13102, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:35:00.203007', 'step': 13102, 'epoch': 2} {'type': 'loss', 'content': 0.11658337712287903, 'timestamp': '2025-10-01 04:35:00.205125', 'step': 13103, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:00.258365', 'step': 13103, 'epoch': 2} {'type': 'loss', 'content': 0.152932807803154, 'timestamp': '2025-10-01 04:35:00.264146', 'step': 13104, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:00.316988', 'step': 13104, 'epoch': 2} {'type': 'loss', 'content': 0.1292111873626709, 'timestamp': '2025-10-01 04:35:00.319283', 'step': 13105, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:00.372530', 'step': 13105, 'epoch': 2} {'type': 'loss', 'content': 0.14006729423999786, 'timestamp': '2025-10-01 04:35:00.374684', 'step': 13106, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:00.429475', 'step': 13106, 'epoch': 2} {'type': 'loss', 'content': 0.06376534700393677, 'timestamp': '2025-10-01 04:35:00.431386', 'step': 13107, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:35:00.485185', 'step': 13107, 'epoch': 2} {'type': 'loss', 'content': 0.15864528715610504, 'timestamp': '2025-10-01 04:35:00.491068', 'step': 13108, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:00.544194', 'step': 13108, 'epoch': 2} {'type': 'loss', 'content': 0.11729516834020615, 'timestamp': '2025-10-01 04:35:00.546371', 'step': 13109, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:00.601157', 'step': 13109, 'epoch': 2} {'type': 'loss', 'content': 0.10278540104627609, 'timestamp': '2025-10-01 04:35:00.603008', 'step': 13110, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:00.657079', 'step': 13110, 'epoch': 2} {'type': 'loss', 'content': 0.09592984616756439, 'timestamp': '2025-10-01 04:35:00.659956', 'step': 13111, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:00.714223', 'step': 13111, 'epoch': 2} {'type': 'loss', 'content': 0.02657569944858551, 'timestamp': '2025-10-01 04:35:00.719940', 'step': 13112, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:00.773171', 'step': 13112, 'epoch': 2} {'type': 'loss', 'content': 0.07740679383277893, 'timestamp': '2025-10-01 04:35:00.775102', 'step': 13113, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:35:00.828187', 'step': 13113, 'epoch': 2} {'type': 'loss', 'content': 0.14718982577323914, 'timestamp': '2025-10-01 04:35:00.830195', 'step': 13114, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:00.883431', 'step': 13114, 'epoch': 2} {'type': 'loss', 'content': 0.12261167168617249, 'timestamp': '2025-10-01 04:35:00.885545', 'step': 13115, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:00.939587', 'step': 13115, 'epoch': 2} {'type': 'loss', 'content': 0.1499781608581543, 'timestamp': '2025-10-01 04:35:00.945257', 'step': 13116, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:01.003211', 'step': 13116, 'epoch': 2} {'type': 'loss', 'content': 0.08932995796203613, 'timestamp': '2025-10-01 04:35:01.005654', 'step': 13117, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:01.058809', 'step': 13117, 'epoch': 2} {'type': 'loss', 'content': 0.22492340207099915, 'timestamp': '2025-10-01 04:35:01.060906', 'step': 13118, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:01.115218', 'step': 13118, 'epoch': 2} {'type': 'loss', 'content': 0.09161461144685745, 'timestamp': '2025-10-01 04:35:01.117308', 'step': 13119, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:01.171578', 'step': 13119, 'epoch': 2} {'type': 'loss', 'content': 0.08891580998897552, 'timestamp': '2025-10-01 04:35:01.177382', 'step': 13120, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:01.230332', 'step': 13120, 'epoch': 2} {'type': 'loss', 'content': 0.07805483043193817, 'timestamp': '2025-10-01 04:35:01.233253', 'step': 13121, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:01.288550', 'step': 13121, 'epoch': 2} {'type': 'loss', 'content': 0.16627366840839386, 'timestamp': '2025-10-01 04:35:01.290910', 'step': 13122, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:01.345796', 'step': 13122, 'epoch': 2} {'type': 'loss', 'content': 0.09308641403913498, 'timestamp': '2025-10-01 04:35:01.348296', 'step': 13123, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:01.402274', 'step': 13123, 'epoch': 2} {'type': 'loss', 'content': 0.07981101423501968, 'timestamp': '2025-10-01 04:35:01.408370', 'step': 13124, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:01.461679', 'step': 13124, 'epoch': 2} {'type': 'loss', 'content': 0.08099286258220673, 'timestamp': '2025-10-01 04:35:01.464088', 'step': 13125, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:35:01.522875', 'step': 13125, 'epoch': 2} {'type': 'loss', 'content': 0.043484073132276535, 'timestamp': '2025-10-01 04:35:01.525230', 'step': 13126, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:01.579296', 'step': 13126, 'epoch': 2} {'type': 'loss', 'content': 0.08051858097314835, 'timestamp': '2025-10-01 04:35:01.581582', 'step': 13127, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:01.637990', 'step': 13127, 'epoch': 2} {'type': 'loss', 'content': 0.10210660845041275, 'timestamp': '2025-10-01 04:35:01.643886', 'step': 13128, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:35:01.698074', 'step': 13128, 'epoch': 2} {'type': 'loss', 'content': 0.08581627905368805, 'timestamp': '2025-10-01 04:35:01.700781', 'step': 13129, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:01.754534', 'step': 13129, 'epoch': 2} {'type': 'loss', 'content': 0.05813604220747948, 'timestamp': '2025-10-01 04:35:01.757118', 'step': 13130, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:01.812239', 'step': 13130, 'epoch': 2} {'type': 'loss', 'content': 0.1261451244354248, 'timestamp': '2025-10-01 04:35:01.814458', 'step': 13131, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:01.868963', 'step': 13131, 'epoch': 2} {'type': 'loss', 'content': 0.10651566833257675, 'timestamp': '2025-10-01 04:35:01.874982', 'step': 13132, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:35:01.929709', 'step': 13132, 'epoch': 2} {'type': 'loss', 'content': 0.18777143955230713, 'timestamp': '2025-10-01 04:35:01.932151', 'step': 13133, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:01.986863', 'step': 13133, 'epoch': 2} {'type': 'loss', 'content': 0.09409654140472412, 'timestamp': '2025-10-01 04:35:01.992257', 'step': 13134, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:02.046896', 'step': 13134, 'epoch': 2} {'type': 'loss', 'content': 0.11897677928209305, 'timestamp': '2025-10-01 04:35:02.049502', 'step': 13135, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:02.104128', 'step': 13135, 'epoch': 2} {'type': 'loss', 'content': 0.15151141583919525, 'timestamp': '2025-10-01 04:35:02.110503', 'step': 13136, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:02.165001', 'step': 13136, 'epoch': 2} {'type': 'loss', 'content': 0.10631595551967621, 'timestamp': '2025-10-01 04:35:02.167694', 'step': 13137, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:02.222151', 'step': 13137, 'epoch': 2} {'type': 'loss', 'content': 0.06557172536849976, 'timestamp': '2025-10-01 04:35:02.224708', 'step': 13138, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:02.281123', 'step': 13138, 'epoch': 2} {'type': 'loss', 'content': 0.06528263539075851, 'timestamp': '2025-10-01 04:35:02.283160', 'step': 13139, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:02.338930', 'step': 13139, 'epoch': 2} {'type': 'loss', 'content': 0.10123084485530853, 'timestamp': '2025-10-01 04:35:02.344733', 'step': 13140, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:02.400438', 'step': 13140, 'epoch': 2} {'type': 'loss', 'content': 0.1163727417588234, 'timestamp': '2025-10-01 04:35:02.402831', 'step': 13141, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:02.457714', 'step': 13141, 'epoch': 2} {'type': 'loss', 'content': 0.10772859305143356, 'timestamp': '2025-10-01 04:35:02.460010', 'step': 13142, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:35:02.514842', 'step': 13142, 'epoch': 2} {'type': 'loss', 'content': 0.1194600760936737, 'timestamp': '2025-10-01 04:35:02.517206', 'step': 13143, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:02.571735', 'step': 13143, 'epoch': 2} {'type': 'loss', 'content': 0.24532054364681244, 'timestamp': '2025-10-01 04:35:02.577753', 'step': 13144, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:02.631578', 'step': 13144, 'epoch': 2} {'type': 'loss', 'content': 0.1051442101597786, 'timestamp': '2025-10-01 04:35:02.634998', 'step': 13145, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:02.689842', 'step': 13145, 'epoch': 2} {'type': 'loss', 'content': 0.15748047828674316, 'timestamp': '2025-10-01 04:35:02.692185', 'step': 13146, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:02.750701', 'step': 13146, 'epoch': 2} {'type': 'loss', 'content': 0.12169241160154343, 'timestamp': '2025-10-01 04:35:02.752998', 'step': 13147, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:02.806754', 'step': 13147, 'epoch': 2} {'type': 'loss', 'content': 0.19029654562473297, 'timestamp': '2025-10-01 04:35:02.812892', 'step': 13148, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:35:02.867224', 'step': 13148, 'epoch': 2} {'type': 'loss', 'content': 0.15377195179462433, 'timestamp': '2025-10-01 04:35:02.869305', 'step': 13149, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:02.922935', 'step': 13149, 'epoch': 2} {'type': 'loss', 'content': 0.1683436781167984, 'timestamp': '2025-10-01 04:35:02.925103', 'step': 13150, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:35:02.978911', 'step': 13150, 'epoch': 2} {'type': 'loss', 'content': 0.12879620492458344, 'timestamp': '2025-10-01 04:35:02.981242', 'step': 13151, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:35:03.034410', 'step': 13151, 'epoch': 2} {'type': 'loss', 'content': 0.1054091602563858, 'timestamp': '2025-10-01 04:35:03.040472', 'step': 13152, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:35:03.093482', 'step': 13152, 'epoch': 2} {'type': 'loss', 'content': 0.1006079763174057, 'timestamp': '2025-10-01 04:35:03.095974', 'step': 13153, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:03.149544', 'step': 13153, 'epoch': 2} {'type': 'loss', 'content': 0.2177528291940689, 'timestamp': '2025-10-01 04:35:03.151678', 'step': 13154, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:35:03.205376', 'step': 13154, 'epoch': 2} {'type': 'loss', 'content': 0.13247616589069366, 'timestamp': '2025-10-01 04:35:03.207478', 'step': 13155, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:03.261691', 'step': 13155, 'epoch': 2} {'type': 'loss', 'content': 0.22336940467357635, 'timestamp': '2025-10-01 04:35:03.267975', 'step': 13156, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:03.320631', 'step': 13156, 'epoch': 2} {'type': 'loss', 'content': 0.05892012640833855, 'timestamp': '2025-10-01 04:35:03.324929', 'step': 13157, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:03.378659', 'step': 13157, 'epoch': 2} {'type': 'loss', 'content': 0.1359136551618576, 'timestamp': '2025-10-01 04:35:03.380662', 'step': 13158, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:03.435146', 'step': 13158, 'epoch': 2} {'type': 'loss', 'content': 0.07388066500425339, 'timestamp': '2025-10-01 04:35:03.437461', 'step': 13159, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:03.491949', 'step': 13159, 'epoch': 2} {'type': 'loss', 'content': 0.04948074743151665, 'timestamp': '2025-10-01 04:35:03.497514', 'step': 13160, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:03.551220', 'step': 13160, 'epoch': 2} {'type': 'loss', 'content': 0.15426014363765717, 'timestamp': '2025-10-01 04:35:03.553669', 'step': 13161, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:03.607143', 'step': 13161, 'epoch': 2} {'type': 'loss', 'content': 0.08423888683319092, 'timestamp': '2025-10-01 04:35:03.609352', 'step': 13162, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:03.663737', 'step': 13162, 'epoch': 2} {'type': 'loss', 'content': 0.13080035150051117, 'timestamp': '2025-10-01 04:35:03.665778', 'step': 13163, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:03.719842', 'step': 13163, 'epoch': 2} {'type': 'loss', 'content': 0.07646005600690842, 'timestamp': '2025-10-01 04:35:03.725522', 'step': 13164, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:03.778725', 'step': 13164, 'epoch': 2} {'type': 'loss', 'content': 0.12607046961784363, 'timestamp': '2025-10-01 04:35:03.782813', 'step': 13165, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:03.838168', 'step': 13165, 'epoch': 2} {'type': 'loss', 'content': 0.12388540059328079, 'timestamp': '2025-10-01 04:35:03.840487', 'step': 13166, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:35:03.894707', 'step': 13166, 'epoch': 2} {'type': 'loss', 'content': 0.12735086679458618, 'timestamp': '2025-10-01 04:35:03.896841', 'step': 13167, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:35:03.952422', 'step': 13167, 'epoch': 2} {'type': 'loss', 'content': 0.09126737713813782, 'timestamp': '2025-10-01 04:35:03.958150', 'step': 13168, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:04.011277', 'step': 13168, 'epoch': 2} {'type': 'loss', 'content': 0.14126160740852356, 'timestamp': '2025-10-01 04:35:04.013599', 'step': 13169, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:04.067167', 'step': 13169, 'epoch': 2} {'type': 'loss', 'content': 0.12378689646720886, 'timestamp': '2025-10-01 04:35:04.069298', 'step': 13170, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:04.122683', 'step': 13170, 'epoch': 2} {'type': 'loss', 'content': 0.10507646948099136, 'timestamp': '2025-10-01 04:35:04.124613', 'step': 13171, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:04.177775', 'step': 13171, 'epoch': 2} {'type': 'loss', 'content': 0.05549311265349388, 'timestamp': '2025-10-01 04:35:04.184377', 'step': 13172, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:04.238109', 'step': 13172, 'epoch': 2} {'type': 'loss', 'content': 0.07507456839084625, 'timestamp': '2025-10-01 04:35:04.240121', 'step': 13173, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:04.293795', 'step': 13173, 'epoch': 2} {'type': 'loss', 'content': 0.13815467059612274, 'timestamp': '2025-10-01 04:35:04.295881', 'step': 13174, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:35:04.349708', 'step': 13174, 'epoch': 2} {'type': 'loss', 'content': 0.11340157687664032, 'timestamp': '2025-10-01 04:35:04.351798', 'step': 13175, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:35:04.405292', 'step': 13175, 'epoch': 2} {'type': 'loss', 'content': 0.08183607459068298, 'timestamp': '2025-10-01 04:35:04.410933', 'step': 13176, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:04.464236', 'step': 13176, 'epoch': 2} {'type': 'loss', 'content': 0.14032593369483948, 'timestamp': '2025-10-01 04:35:04.466070', 'step': 13177, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:35:04.519185', 'step': 13177, 'epoch': 2} {'type': 'loss', 'content': 0.10059324651956558, 'timestamp': '2025-10-01 04:35:04.521288', 'step': 13178, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:04.574731', 'step': 13178, 'epoch': 2} {'type': 'loss', 'content': 0.07805857807397842, 'timestamp': '2025-10-01 04:35:04.577023', 'step': 13179, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:04.631335', 'step': 13179, 'epoch': 2} {'type': 'loss', 'content': 0.18396763503551483, 'timestamp': '2025-10-01 04:35:04.637061', 'step': 13180, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:35:04.691172', 'step': 13180, 'epoch': 2} {'type': 'loss', 'content': 0.07700660824775696, 'timestamp': '2025-10-01 04:35:04.693029', 'step': 13181, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:04.746141', 'step': 13181, 'epoch': 2} {'type': 'loss', 'content': 0.11576838791370392, 'timestamp': '2025-10-01 04:35:04.748248', 'step': 13182, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:04.803344', 'step': 13182, 'epoch': 2} {'type': 'loss', 'content': 0.13830558955669403, 'timestamp': '2025-10-01 04:35:04.805289', 'step': 13183, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:04.860198', 'step': 13183, 'epoch': 2} {'type': 'loss', 'content': 0.06897078454494476, 'timestamp': '2025-10-01 04:35:04.865713', 'step': 13184, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:04.920584', 'step': 13184, 'epoch': 2} {'type': 'loss', 'content': 0.06971610337495804, 'timestamp': '2025-10-01 04:35:04.923071', 'step': 13185, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:35:04.976392', 'step': 13185, 'epoch': 2} {'type': 'loss', 'content': 0.17491675913333893, 'timestamp': '2025-10-01 04:35:04.978498', 'step': 13186, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:35:05.031983', 'step': 13186, 'epoch': 2} {'type': 'loss', 'content': 0.08344271034002304, 'timestamp': '2025-10-01 04:35:05.034173', 'step': 13187, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:05.088960', 'step': 13187, 'epoch': 2} {'type': 'loss', 'content': 0.07200930267572403, 'timestamp': '2025-10-01 04:35:05.097309', 'step': 13188, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:05.150597', 'step': 13188, 'epoch': 2} {'type': 'loss', 'content': 0.10473761707544327, 'timestamp': '2025-10-01 04:35:05.152575', 'step': 13189, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:05.206103', 'step': 13189, 'epoch': 2} {'type': 'loss', 'content': 0.13834291696548462, 'timestamp': '2025-10-01 04:35:05.208251', 'step': 13190, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:35:05.262087', 'step': 13190, 'epoch': 2} {'type': 'loss', 'content': 0.059769485145807266, 'timestamp': '2025-10-01 04:35:05.264043', 'step': 13191, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:05.317800', 'step': 13191, 'epoch': 2} {'type': 'loss', 'content': 0.07487726956605911, 'timestamp': '2025-10-01 04:35:05.323379', 'step': 13192, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:05.377946', 'step': 13192, 'epoch': 2} {'type': 'loss', 'content': 0.10695680975914001, 'timestamp': '2025-10-01 04:35:05.379977', 'step': 13193, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:05.435696', 'step': 13193, 'epoch': 2} {'type': 'loss', 'content': 0.0922667607665062, 'timestamp': '2025-10-01 04:35:05.438187', 'step': 13194, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:05.493172', 'step': 13194, 'epoch': 2} {'type': 'loss', 'content': 0.08876583725214005, 'timestamp': '2025-10-01 04:35:05.495325', 'step': 13195, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:35:05.550437', 'step': 13195, 'epoch': 2} {'type': 'loss', 'content': 0.13210269808769226, 'timestamp': '2025-10-01 04:35:05.556193', 'step': 13196, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:35:05.611742', 'step': 13196, 'epoch': 2} {'type': 'loss', 'content': 0.11780095845460892, 'timestamp': '2025-10-01 04:35:05.613588', 'step': 13197, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:05.666708', 'step': 13197, 'epoch': 2} {'type': 'loss', 'content': 0.08284039795398712, 'timestamp': '2025-10-01 04:35:05.668794', 'step': 13198, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:05.722869', 'step': 13198, 'epoch': 2} {'type': 'loss', 'content': 0.11911872774362564, 'timestamp': '2025-10-01 04:35:05.725384', 'step': 13199, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:05.794997', 'step': 13199, 'epoch': 2} {'type': 'loss', 'content': 0.11918099969625473, 'timestamp': '2025-10-01 04:35:05.800556', 'step': 13200, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:05.853999', 'step': 13200, 'epoch': 2} {'type': 'loss', 'content': 0.08994182199239731, 'timestamp': '2025-10-01 04:35:05.967838', 'step': 13201, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:35:06.024753', 'step': 13201, 'epoch': 2} {'type': 'loss', 'content': 0.11594364792108536, 'timestamp': '2025-10-01 04:35:06.027309', 'step': 13202, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:06.081399', 'step': 13202, 'epoch': 2} {'type': 'loss', 'content': 0.10392161458730698, 'timestamp': '2025-10-01 04:35:06.083692', 'step': 13203, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:06.137622', 'step': 13203, 'epoch': 2} {'type': 'loss', 'content': 0.09588521718978882, 'timestamp': '2025-10-01 04:35:06.143384', 'step': 13204, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:06.197810', 'step': 13204, 'epoch': 2} {'type': 'loss', 'content': 0.08215208351612091, 'timestamp': '2025-10-01 04:35:06.200291', 'step': 13205, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:06.256013', 'step': 13205, 'epoch': 2} {'type': 'loss', 'content': 0.16931608319282532, 'timestamp': '2025-10-01 04:35:06.258515', 'step': 13206, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:06.311802', 'step': 13206, 'epoch': 2} {'type': 'loss', 'content': 0.09157732874155045, 'timestamp': '2025-10-01 04:35:06.313908', 'step': 13207, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:06.367838', 'step': 13207, 'epoch': 2} {'type': 'loss', 'content': 0.09131895005702972, 'timestamp': '2025-10-01 04:35:06.373806', 'step': 13208, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:06.427735', 'step': 13208, 'epoch': 2} {'type': 'loss', 'content': 0.14030322432518005, 'timestamp': '2025-10-01 04:35:06.430036', 'step': 13209, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:06.483822', 'step': 13209, 'epoch': 2} {'type': 'loss', 'content': 0.12050822377204895, 'timestamp': '2025-10-01 04:35:06.485993', 'step': 13210, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:06.539420', 'step': 13210, 'epoch': 2} {'type': 'loss', 'content': 0.12041033804416656, 'timestamp': '2025-10-01 04:35:06.541632', 'step': 13211, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:06.595244', 'step': 13211, 'epoch': 2} {'type': 'loss', 'content': 0.05068809539079666, 'timestamp': '2025-10-01 04:35:06.601070', 'step': 13212, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:35:06.659805', 'step': 13212, 'epoch': 2} {'type': 'loss', 'content': 0.06344398856163025, 'timestamp': '2025-10-01 04:35:06.661872', 'step': 13213, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:06.715937', 'step': 13213, 'epoch': 2} {'type': 'loss', 'content': 0.07867272198200226, 'timestamp': '2025-10-01 04:35:06.718068', 'step': 13214, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:06.771522', 'step': 13214, 'epoch': 2} {'type': 'loss', 'content': 0.10610654205083847, 'timestamp': '2025-10-01 04:35:06.773599', 'step': 13215, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:06.827142', 'step': 13215, 'epoch': 2} {'type': 'loss', 'content': 0.09675026684999466, 'timestamp': '2025-10-01 04:35:06.832833', 'step': 13216, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:06.886347', 'step': 13216, 'epoch': 2} {'type': 'loss', 'content': 0.13348649442195892, 'timestamp': '2025-10-01 04:35:06.888558', 'step': 13217, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:35:06.941828', 'step': 13217, 'epoch': 2} {'type': 'loss', 'content': 0.16842719912528992, 'timestamp': '2025-10-01 04:35:06.943949', 'step': 13218, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:06.998301', 'step': 13218, 'epoch': 2} {'type': 'loss', 'content': 0.09283003211021423, 'timestamp': '2025-10-01 04:35:07.000788', 'step': 13219, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:07.055231', 'step': 13219, 'epoch': 2} {'type': 'loss', 'content': 0.07641734182834625, 'timestamp': '2025-10-01 04:35:07.060938', 'step': 13220, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:07.113753', 'step': 13220, 'epoch': 2} {'type': 'loss', 'content': 0.11513511836528778, 'timestamp': '2025-10-01 04:35:07.115898', 'step': 13221, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:07.169381', 'step': 13221, 'epoch': 2} {'type': 'loss', 'content': 0.1099482923746109, 'timestamp': '2025-10-01 04:35:07.171584', 'step': 13222, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:07.225834', 'step': 13222, 'epoch': 2} {'type': 'loss', 'content': 0.05052197724580765, 'timestamp': '2025-10-01 04:35:07.228053', 'step': 13223, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:07.282006', 'step': 13223, 'epoch': 2} {'type': 'loss', 'content': 0.08950015902519226, 'timestamp': '2025-10-01 04:35:07.287971', 'step': 13224, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:07.341110', 'step': 13224, 'epoch': 2} {'type': 'loss', 'content': 0.25534096360206604, 'timestamp': '2025-10-01 04:35:07.343324', 'step': 13225, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:35:07.397340', 'step': 13225, 'epoch': 2} {'type': 'loss', 'content': 0.13584914803504944, 'timestamp': '2025-10-01 04:35:07.399444', 'step': 13226, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:35:07.453443', 'step': 13226, 'epoch': 2} {'type': 'loss', 'content': 0.13127517700195312, 'timestamp': '2025-10-01 04:35:07.455929', 'step': 13227, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:07.509737', 'step': 13227, 'epoch': 2} {'type': 'loss', 'content': 0.14879722893238068, 'timestamp': '2025-10-01 04:35:07.515406', 'step': 13228, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:07.569699', 'step': 13228, 'epoch': 2} {'type': 'loss', 'content': 0.11845381557941437, 'timestamp': '2025-10-01 04:35:07.571903', 'step': 13229, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:35:07.626494', 'step': 13229, 'epoch': 2} {'type': 'loss', 'content': 0.07893465459346771, 'timestamp': '2025-10-01 04:35:07.628585', 'step': 13230, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:07.682299', 'step': 13230, 'epoch': 2} {'type': 'loss', 'content': 0.2088562399148941, 'timestamp': '2025-10-01 04:35:07.684356', 'step': 13231, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:07.738242', 'step': 13231, 'epoch': 2} {'type': 'loss', 'content': 0.2538343667984009, 'timestamp': '2025-10-01 04:35:07.744032', 'step': 13232, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:07.797453', 'step': 13232, 'epoch': 2} {'type': 'loss', 'content': 0.13562621176242828, 'timestamp': '2025-10-01 04:35:07.800281', 'step': 13233, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:07.854527', 'step': 13233, 'epoch': 2} {'type': 'loss', 'content': 0.07544761896133423, 'timestamp': '2025-10-01 04:35:07.856727', 'step': 13234, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:07.910413', 'step': 13234, 'epoch': 2} {'type': 'loss', 'content': 0.14714069664478302, 'timestamp': '2025-10-01 04:35:07.912777', 'step': 13235, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:07.967128', 'step': 13235, 'epoch': 2} {'type': 'loss', 'content': 0.07887421548366547, 'timestamp': '2025-10-01 04:35:07.972806', 'step': 13236, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:08.025843', 'step': 13236, 'epoch': 2} {'type': 'loss', 'content': 0.08589287847280502, 'timestamp': '2025-10-01 04:35:08.028261', 'step': 13237, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:08.081622', 'step': 13237, 'epoch': 2} {'type': 'loss', 'content': 0.11204259842634201, 'timestamp': '2025-10-01 04:35:08.083859', 'step': 13238, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:08.137781', 'step': 13238, 'epoch': 2} {'type': 'loss', 'content': 0.18020091950893402, 'timestamp': '2025-10-01 04:35:08.139878', 'step': 13239, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:35:08.193443', 'step': 13239, 'epoch': 2} {'type': 'loss', 'content': 0.16513322293758392, 'timestamp': '2025-10-01 04:35:08.199320', 'step': 13240, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:08.252706', 'step': 13240, 'epoch': 2} {'type': 'loss', 'content': 0.133662149310112, 'timestamp': '2025-10-01 04:35:08.254910', 'step': 13241, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:08.308883', 'step': 13241, 'epoch': 2} {'type': 'loss', 'content': 0.0791032537817955, 'timestamp': '2025-10-01 04:35:08.311595', 'step': 13242, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:08.365935', 'step': 13242, 'epoch': 2} {'type': 'loss', 'content': 0.16698794066905975, 'timestamp': '2025-10-01 04:35:08.368148', 'step': 13243, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:08.422019', 'step': 13243, 'epoch': 2} {'type': 'loss', 'content': 0.09819135069847107, 'timestamp': '2025-10-01 04:35:08.427704', 'step': 13244, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:08.480626', 'step': 13244, 'epoch': 2} {'type': 'loss', 'content': 0.17708781361579895, 'timestamp': '2025-10-01 04:35:08.482710', 'step': 13245, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:08.536827', 'step': 13245, 'epoch': 2} {'type': 'loss', 'content': 0.185957670211792, 'timestamp': '2025-10-01 04:35:08.538963', 'step': 13246, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:08.593902', 'step': 13246, 'epoch': 2} {'type': 'loss', 'content': 0.15293753147125244, 'timestamp': '2025-10-01 04:35:08.595835', 'step': 13247, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:08.649173', 'step': 13247, 'epoch': 2} {'type': 'loss', 'content': 0.08288799971342087, 'timestamp': '2025-10-01 04:35:08.655252', 'step': 13248, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:08.709502', 'step': 13248, 'epoch': 2} {'type': 'loss', 'content': 0.13040360808372498, 'timestamp': '2025-10-01 04:35:08.711664', 'step': 13249, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:08.765572', 'step': 13249, 'epoch': 2} {'type': 'loss', 'content': 0.16169922053813934, 'timestamp': '2025-10-01 04:35:08.767669', 'step': 13250, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:08.841219', 'step': 13250, 'epoch': 2} {'type': 'loss', 'content': 0.1277925819158554, 'timestamp': '2025-10-01 04:35:08.855423', 'step': 13251, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:08.928565', 'step': 13251, 'epoch': 2} {'type': 'loss', 'content': 0.10144896060228348, 'timestamp': '2025-10-01 04:35:08.940860', 'step': 13252, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:09.017987', 'step': 13252, 'epoch': 2} {'type': 'loss', 'content': 0.11332274228334427, 'timestamp': '2025-10-01 04:35:09.023674', 'step': 13253, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:09.205523', 'step': 13253, 'epoch': 2} {'type': 'loss', 'content': 0.13020573556423187, 'timestamp': '2025-10-01 04:35:09.209604', 'step': 13254, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:09.286246', 'step': 13254, 'epoch': 2} {'type': 'loss', 'content': 0.12446833401918411, 'timestamp': '2025-10-01 04:35:09.294816', 'step': 13255, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:09.392618', 'step': 13255, 'epoch': 2} {'type': 'loss', 'content': 0.16346120834350586, 'timestamp': '2025-10-01 04:35:09.398873', 'step': 13256, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:09.464544', 'step': 13256, 'epoch': 2} {'type': 'loss', 'content': 0.121980682015419, 'timestamp': '2025-10-01 04:35:09.467089', 'step': 13257, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:09.521818', 'step': 13257, 'epoch': 2} {'type': 'loss', 'content': 0.04741905629634857, 'timestamp': '2025-10-01 04:35:09.524883', 'step': 13258, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:09.580169', 'step': 13258, 'epoch': 2} {'type': 'loss', 'content': 0.08241300284862518, 'timestamp': '2025-10-01 04:35:09.582432', 'step': 13259, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:35:09.637458', 'step': 13259, 'epoch': 2} {'type': 'loss', 'content': 0.06592310965061188, 'timestamp': '2025-10-01 04:35:09.643752', 'step': 13260, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:09.698232', 'step': 13260, 'epoch': 2} {'type': 'loss', 'content': 0.22142337262630463, 'timestamp': '2025-10-01 04:35:09.701036', 'step': 13261, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:35:09.761903', 'step': 13261, 'epoch': 2} {'type': 'loss', 'content': 0.16994082927703857, 'timestamp': '2025-10-01 04:35:09.764265', 'step': 13262, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:09.819019', 'step': 13262, 'epoch': 2} {'type': 'loss', 'content': 0.027012722566723824, 'timestamp': '2025-10-01 04:35:09.821406', 'step': 13263, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:09.875591', 'step': 13263, 'epoch': 2} {'type': 'loss', 'content': 0.09222879260778427, 'timestamp': '2025-10-01 04:35:09.894578', 'step': 13264, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:09.971565', 'step': 13264, 'epoch': 2} {'type': 'loss', 'content': 0.05464811995625496, 'timestamp': '2025-10-01 04:35:09.987918', 'step': 13265, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:35:10.059852', 'step': 13265, 'epoch': 2} {'type': 'loss', 'content': 0.16652852296829224, 'timestamp': '2025-10-01 04:35:10.063004', 'step': 13266, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:10.144489', 'step': 13266, 'epoch': 2} {'type': 'loss', 'content': 0.11241132766008377, 'timestamp': '2025-10-01 04:35:10.178572', 'step': 13267, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:10.269931', 'step': 13267, 'epoch': 2} {'type': 'loss', 'content': 0.13224396109580994, 'timestamp': '2025-10-01 04:35:10.279867', 'step': 13268, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:35:10.341355', 'step': 13268, 'epoch': 2} {'type': 'loss', 'content': 0.11437184363603592, 'timestamp': '2025-10-01 04:35:10.350777', 'step': 13269, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:10.432614', 'step': 13269, 'epoch': 2} {'type': 'loss', 'content': 0.06676747649908066, 'timestamp': '2025-10-01 04:35:10.441467', 'step': 13270, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:10.536571', 'step': 13270, 'epoch': 2} {'type': 'loss', 'content': 0.050158917903900146, 'timestamp': '2025-10-01 04:35:10.538513', 'step': 13271, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:35:10.593259', 'step': 13271, 'epoch': 2} {'type': 'loss', 'content': 0.08458483964204788, 'timestamp': '2025-10-01 04:35:10.600143', 'step': 13272, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:10.654813', 'step': 13272, 'epoch': 2} {'type': 'loss', 'content': 0.16282498836517334, 'timestamp': '2025-10-01 04:35:10.657106', 'step': 13273, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:10.713053', 'step': 13273, 'epoch': 2} {'type': 'loss', 'content': 0.11762747913599014, 'timestamp': '2025-10-01 04:35:10.715343', 'step': 13274, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:10.778974', 'step': 13274, 'epoch': 2} {'type': 'loss', 'content': 0.12841244041919708, 'timestamp': '2025-10-01 04:35:10.781912', 'step': 13275, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:10.836615', 'step': 13275, 'epoch': 2} {'type': 'loss', 'content': 0.1388978660106659, 'timestamp': '2025-10-01 04:35:10.843074', 'step': 13276, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:10.897345', 'step': 13276, 'epoch': 2} {'type': 'loss', 'content': 0.07321558147668839, 'timestamp': '2025-10-01 04:35:10.899774', 'step': 13277, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:10.954170', 'step': 13277, 'epoch': 2} {'type': 'loss', 'content': 0.12448492646217346, 'timestamp': '2025-10-01 04:35:10.956381', 'step': 13278, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:11.010147', 'step': 13278, 'epoch': 2} {'type': 'loss', 'content': 0.028871729969978333, 'timestamp': '2025-10-01 04:35:11.012456', 'step': 13279, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:11.068385', 'step': 13279, 'epoch': 2} {'type': 'loss', 'content': 0.17037589848041534, 'timestamp': '2025-10-01 04:35:11.074481', 'step': 13280, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:11.128700', 'step': 13280, 'epoch': 2} {'type': 'loss', 'content': 0.11325648427009583, 'timestamp': '2025-10-01 04:35:11.130835', 'step': 13281, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:11.190879', 'step': 13281, 'epoch': 2} {'type': 'loss', 'content': 0.21878497302532196, 'timestamp': '2025-10-01 04:35:11.192920', 'step': 13282, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:11.247422', 'step': 13282, 'epoch': 2} {'type': 'loss', 'content': 0.1169593334197998, 'timestamp': '2025-10-01 04:35:11.249553', 'step': 13283, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:11.303961', 'step': 13283, 'epoch': 2} {'type': 'loss', 'content': 0.0870334804058075, 'timestamp': '2025-10-01 04:35:11.310260', 'step': 13284, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:11.366165', 'step': 13284, 'epoch': 2} {'type': 'loss', 'content': 0.09843979775905609, 'timestamp': '2025-10-01 04:35:11.368309', 'step': 13285, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:35:11.422630', 'step': 13285, 'epoch': 2} {'type': 'loss', 'content': 0.1732119768857956, 'timestamp': '2025-10-01 04:35:11.424829', 'step': 13286, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:11.478898', 'step': 13286, 'epoch': 2} {'type': 'loss', 'content': 0.10611335188150406, 'timestamp': '2025-10-01 04:35:11.481234', 'step': 13287, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:11.544917', 'step': 13287, 'epoch': 2} {'type': 'loss', 'content': 0.06522167474031448, 'timestamp': '2025-10-01 04:35:11.550733', 'step': 13288, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:11.604361', 'step': 13288, 'epoch': 2} {'type': 'loss', 'content': 0.09984148293733597, 'timestamp': '2025-10-01 04:35:11.606400', 'step': 13289, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:11.659874', 'step': 13289, 'epoch': 2} {'type': 'loss', 'content': 0.12161706387996674, 'timestamp': '2025-10-01 04:35:11.661941', 'step': 13290, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:11.715627', 'step': 13290, 'epoch': 2} {'type': 'loss', 'content': 0.17247965931892395, 'timestamp': '2025-10-01 04:35:11.717768', 'step': 13291, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:11.771793', 'step': 13291, 'epoch': 2} {'type': 'loss', 'content': 0.09120171517133713, 'timestamp': '2025-10-01 04:35:11.777533', 'step': 13292, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-01 04:35:11.832848', 'step': 13292, 'epoch': 2} {'type': 'loss', 'content': 0.14991475641727448, 'timestamp': '2025-10-01 04:35:11.834962', 'step': 13293, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:11.894627', 'step': 13293, 'epoch': 2} {'type': 'loss', 'content': 0.11292467266321182, 'timestamp': '2025-10-01 04:35:11.896837', 'step': 13294, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:11.951364', 'step': 13294, 'epoch': 2} {'type': 'loss', 'content': 0.03893563896417618, 'timestamp': '2025-10-01 04:35:11.953708', 'step': 13295, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:12.007545', 'step': 13295, 'epoch': 2} {'type': 'loss', 'content': 0.1476263850927353, 'timestamp': '2025-10-01 04:35:12.020502', 'step': 13296, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:12.078208', 'step': 13296, 'epoch': 2} {'type': 'loss', 'content': 0.09667523950338364, 'timestamp': '2025-10-01 04:35:12.080371', 'step': 13297, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:12.134043', 'step': 13297, 'epoch': 2} {'type': 'loss', 'content': 0.09529762715101242, 'timestamp': '2025-10-01 04:35:12.136245', 'step': 13298, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:12.192412', 'step': 13298, 'epoch': 2} {'type': 'loss', 'content': 0.100882388651371, 'timestamp': '2025-10-01 04:35:12.195769', 'step': 13299, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:35:12.255977', 'step': 13299, 'epoch': 2} {'type': 'loss', 'content': 0.11247687041759491, 'timestamp': '2025-10-01 04:35:12.261806', 'step': 13300, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:35:12.315595', 'step': 13300, 'epoch': 2} {'type': 'loss', 'content': 0.09536086767911911, 'timestamp': '2025-10-01 04:35:12.318355', 'step': 13301, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:12.374102', 'step': 13301, 'epoch': 2} {'type': 'loss', 'content': 0.15798884630203247, 'timestamp': '2025-10-01 04:35:12.378973', 'step': 13302, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:12.436693', 'step': 13302, 'epoch': 2} {'type': 'loss', 'content': 0.08136015385389328, 'timestamp': '2025-10-01 04:35:12.441203', 'step': 13303, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:12.497787', 'step': 13303, 'epoch': 2} {'type': 'loss', 'content': 0.09389079362154007, 'timestamp': '2025-10-01 04:35:12.503789', 'step': 13304, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:35:12.556864', 'step': 13304, 'epoch': 2} {'type': 'loss', 'content': 0.11606767028570175, 'timestamp': '2025-10-01 04:35:12.559156', 'step': 13305, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:35:12.615145', 'step': 13305, 'epoch': 2} {'type': 'loss', 'content': 0.13394765555858612, 'timestamp': '2025-10-01 04:35:12.617264', 'step': 13306, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:35:12.677919', 'step': 13306, 'epoch': 2} {'type': 'loss', 'content': 0.1575326919555664, 'timestamp': '2025-10-01 04:35:12.681092', 'step': 13307, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:12.744396', 'step': 13307, 'epoch': 2} {'type': 'loss', 'content': 0.12320441007614136, 'timestamp': '2025-10-01 04:35:12.751861', 'step': 13308, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:12.811727', 'step': 13308, 'epoch': 2} {'type': 'loss', 'content': 0.1090400293469429, 'timestamp': '2025-10-01 04:35:12.814268', 'step': 13309, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:12.873823', 'step': 13309, 'epoch': 2} {'type': 'loss', 'content': 0.20384801924228668, 'timestamp': '2025-10-01 04:35:12.876183', 'step': 13310, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:35:12.936297', 'step': 13310, 'epoch': 2} {'type': 'loss', 'content': 0.13774549961090088, 'timestamp': '2025-10-01 04:35:12.938830', 'step': 13311, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:12.999950', 'step': 13311, 'epoch': 2} {'type': 'loss', 'content': 0.10387377440929413, 'timestamp': '2025-10-01 04:35:13.006873', 'step': 13312, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:13.064611', 'step': 13312, 'epoch': 2} {'type': 'loss', 'content': 0.16095910966396332, 'timestamp': '2025-10-01 04:35:13.066852', 'step': 13313, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:13.123155', 'step': 13313, 'epoch': 2} {'type': 'loss', 'content': 0.09799738228321075, 'timestamp': '2025-10-01 04:35:13.125415', 'step': 13314, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:13.183758', 'step': 13314, 'epoch': 2} {'type': 'loss', 'content': 0.14191864430904388, 'timestamp': '2025-10-01 04:35:13.186177', 'step': 13315, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:35:13.244743', 'step': 13315, 'epoch': 2} {'type': 'loss', 'content': 0.09970029443502426, 'timestamp': '2025-10-01 04:35:13.251915', 'step': 13316, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:13.311204', 'step': 13316, 'epoch': 2} {'type': 'loss', 'content': 0.153427392244339, 'timestamp': '2025-10-01 04:35:13.313800', 'step': 13317, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:13.371145', 'step': 13317, 'epoch': 2} {'type': 'loss', 'content': 0.11942770332098007, 'timestamp': '2025-10-01 04:35:13.373318', 'step': 13318, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:13.430159', 'step': 13318, 'epoch': 2} {'type': 'loss', 'content': 0.12692657113075256, 'timestamp': '2025-10-01 04:35:13.432248', 'step': 13319, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:35:13.485939', 'step': 13319, 'epoch': 2} {'type': 'loss', 'content': 0.2059304565191269, 'timestamp': '2025-10-01 04:35:13.492350', 'step': 13320, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:13.545814', 'step': 13320, 'epoch': 2} {'type': 'loss', 'content': 0.11671153455972672, 'timestamp': '2025-10-01 04:35:13.548017', 'step': 13321, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:13.602398', 'step': 13321, 'epoch': 2} {'type': 'loss', 'content': 0.10644101351499557, 'timestamp': '2025-10-01 04:35:13.604502', 'step': 13322, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:35:13.658433', 'step': 13322, 'epoch': 2} {'type': 'loss', 'content': 0.10982199758291245, 'timestamp': '2025-10-01 04:35:13.660704', 'step': 13323, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:13.713984', 'step': 13323, 'epoch': 2} {'type': 'loss', 'content': 0.07551304996013641, 'timestamp': '2025-10-01 04:35:13.719945', 'step': 13324, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:13.772876', 'step': 13324, 'epoch': 2} {'type': 'loss', 'content': 0.11917237937450409, 'timestamp': '2025-10-01 04:35:13.776269', 'step': 13325, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:13.829794', 'step': 13325, 'epoch': 2} {'type': 'loss', 'content': 0.12228146195411682, 'timestamp': '2025-10-01 04:35:13.832900', 'step': 13326, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:13.886715', 'step': 13326, 'epoch': 2} {'type': 'loss', 'content': 0.13568885624408722, 'timestamp': '2025-10-01 04:35:13.888899', 'step': 13327, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:13.942202', 'step': 13327, 'epoch': 2} {'type': 'loss', 'content': 0.10488259792327881, 'timestamp': '2025-10-01 04:35:13.948079', 'step': 13328, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:14.001929', 'step': 13328, 'epoch': 2} {'type': 'loss', 'content': 0.11427980661392212, 'timestamp': '2025-10-01 04:35:14.004059', 'step': 13329, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:35:14.058828', 'step': 13329, 'epoch': 2} {'type': 'loss', 'content': 0.18324437737464905, 'timestamp': '2025-10-01 04:35:14.060981', 'step': 13330, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:14.115171', 'step': 13330, 'epoch': 2} {'type': 'loss', 'content': 0.10457838326692581, 'timestamp': '2025-10-01 04:35:14.117597', 'step': 13331, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:14.171738', 'step': 13331, 'epoch': 2} {'type': 'loss', 'content': 0.08185584098100662, 'timestamp': '2025-10-01 04:35:14.177950', 'step': 13332, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:14.232039', 'step': 13332, 'epoch': 2} {'type': 'loss', 'content': 0.1303962767124176, 'timestamp': '2025-10-01 04:35:14.234184', 'step': 13333, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:14.288183', 'step': 13333, 'epoch': 2} {'type': 'loss', 'content': 0.02501184679567814, 'timestamp': '2025-10-01 04:35:14.290909', 'step': 13334, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:14.353846', 'step': 13334, 'epoch': 2} {'type': 'loss', 'content': 0.10590307414531708, 'timestamp': '2025-10-01 04:35:14.362594', 'step': 13335, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:14.416056', 'step': 13335, 'epoch': 2} {'type': 'loss', 'content': 0.08544334769248962, 'timestamp': '2025-10-01 04:35:14.422784', 'step': 13336, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:14.475566', 'step': 13336, 'epoch': 2} {'type': 'loss', 'content': 0.06804033368825912, 'timestamp': '2025-10-01 04:35:14.477740', 'step': 13337, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:14.532381', 'step': 13337, 'epoch': 2} {'type': 'loss', 'content': 0.07475347071886063, 'timestamp': '2025-10-01 04:35:14.534935', 'step': 13338, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:14.588710', 'step': 13338, 'epoch': 2} {'type': 'loss', 'content': 0.14444278180599213, 'timestamp': '2025-10-01 04:35:14.590906', 'step': 13339, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:35:14.645132', 'step': 13339, 'epoch': 2} {'type': 'loss', 'content': 0.11566245555877686, 'timestamp': '2025-10-01 04:35:14.651297', 'step': 13340, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:14.705102', 'step': 13340, 'epoch': 2} {'type': 'loss', 'content': 0.1853782832622528, 'timestamp': '2025-10-01 04:35:14.707191', 'step': 13341, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:14.761094', 'step': 13341, 'epoch': 2} {'type': 'loss', 'content': 0.11219655722379684, 'timestamp': '2025-10-01 04:35:14.763251', 'step': 13342, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:35:14.816863', 'step': 13342, 'epoch': 2} {'type': 'loss', 'content': 0.141121506690979, 'timestamp': '2025-10-01 04:35:14.819341', 'step': 13343, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:14.873110', 'step': 13343, 'epoch': 2} {'type': 'loss', 'content': 0.15079861879348755, 'timestamp': '2025-10-01 04:35:14.878827', 'step': 13344, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:14.932600', 'step': 13344, 'epoch': 2} {'type': 'loss', 'content': 0.04934856668114662, 'timestamp': '2025-10-01 04:35:14.934699', 'step': 13345, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:14.988466', 'step': 13345, 'epoch': 2} {'type': 'loss', 'content': 0.17814062535762787, 'timestamp': '2025-10-01 04:35:14.990727', 'step': 13346, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:15.044557', 'step': 13346, 'epoch': 2} {'type': 'loss', 'content': 0.08293309062719345, 'timestamp': '2025-10-01 04:35:15.046765', 'step': 13347, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:15.103030', 'step': 13347, 'epoch': 2} {'type': 'loss', 'content': 0.09687462449073792, 'timestamp': '2025-10-01 04:35:15.109381', 'step': 13348, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:15.163075', 'step': 13348, 'epoch': 2} {'type': 'loss', 'content': 0.2617763876914978, 'timestamp': '2025-10-01 04:35:15.165158', 'step': 13349, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:15.218768', 'step': 13349, 'epoch': 2} {'type': 'loss', 'content': 0.08481386303901672, 'timestamp': '2025-10-01 04:35:15.220888', 'step': 13350, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:35:15.274032', 'step': 13350, 'epoch': 2} {'type': 'loss', 'content': 0.15192434191703796, 'timestamp': '2025-10-01 04:35:15.276265', 'step': 13351, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:15.329487', 'step': 13351, 'epoch': 2} {'type': 'loss', 'content': 0.06641079485416412, 'timestamp': '2025-10-01 04:35:15.336101', 'step': 13352, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:15.391549', 'step': 13352, 'epoch': 2} {'type': 'loss', 'content': 0.07378322631120682, 'timestamp': '2025-10-01 04:35:15.393901', 'step': 13353, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:15.447270', 'step': 13353, 'epoch': 2} {'type': 'loss', 'content': 0.0913170725107193, 'timestamp': '2025-10-01 04:35:15.449465', 'step': 13354, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:15.503231', 'step': 13354, 'epoch': 2} {'type': 'loss', 'content': 0.16398392617702484, 'timestamp': '2025-10-01 04:35:15.505218', 'step': 13355, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:35:15.559249', 'step': 13355, 'epoch': 2} {'type': 'loss', 'content': 0.1328887939453125, 'timestamp': '2025-10-01 04:35:15.565534', 'step': 13356, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:15.617972', 'step': 13356, 'epoch': 2} {'type': 'loss', 'content': 0.15865741670131683, 'timestamp': '2025-10-01 04:35:15.625529', 'step': 13357, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:15.680748', 'step': 13357, 'epoch': 2} {'type': 'loss', 'content': 0.1105744019150734, 'timestamp': '2025-10-01 04:35:15.682805', 'step': 13358, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:35:15.736517', 'step': 13358, 'epoch': 2} {'type': 'loss', 'content': 0.04669833555817604, 'timestamp': '2025-10-01 04:35:15.738643', 'step': 13359, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:15.797996', 'step': 13359, 'epoch': 2} {'type': 'loss', 'content': 0.16111639142036438, 'timestamp': '2025-10-01 04:35:15.803836', 'step': 13360, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:15.856732', 'step': 13360, 'epoch': 2} {'type': 'loss', 'content': 0.10776814818382263, 'timestamp': '2025-10-01 04:35:15.858678', 'step': 13361, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:15.911472', 'step': 13361, 'epoch': 2} {'type': 'loss', 'content': 0.06867435574531555, 'timestamp': '2025-10-01 04:35:15.914113', 'step': 13362, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:15.966963', 'step': 13362, 'epoch': 2} {'type': 'loss', 'content': 0.07974095642566681, 'timestamp': '2025-10-01 04:35:15.969178', 'step': 13363, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:35:16.023753', 'step': 13363, 'epoch': 2} {'type': 'loss', 'content': 0.21297600865364075, 'timestamp': '2025-10-01 04:35:16.029648', 'step': 13364, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:16.083158', 'step': 13364, 'epoch': 2} {'type': 'loss', 'content': 0.15207712352275848, 'timestamp': '2025-10-01 04:35:16.085360', 'step': 13365, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:16.139135', 'step': 13365, 'epoch': 2} {'type': 'loss', 'content': 0.16498175263404846, 'timestamp': '2025-10-01 04:35:16.141614', 'step': 13366, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:16.194756', 'step': 13366, 'epoch': 2} {'type': 'loss', 'content': 0.14352917671203613, 'timestamp': '2025-10-01 04:35:16.197075', 'step': 13367, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:16.250828', 'step': 13367, 'epoch': 2} {'type': 'loss', 'content': 0.17420192062854767, 'timestamp': '2025-10-01 04:35:16.256505', 'step': 13368, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:16.309854', 'step': 13368, 'epoch': 2} {'type': 'loss', 'content': 0.10772455483675003, 'timestamp': '2025-10-01 04:35:16.312428', 'step': 13369, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:16.366300', 'step': 13369, 'epoch': 2} {'type': 'loss', 'content': 0.11642127484083176, 'timestamp': '2025-10-01 04:35:16.368277', 'step': 13370, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:16.423648', 'step': 13370, 'epoch': 2} {'type': 'loss', 'content': 0.12356103211641312, 'timestamp': '2025-10-01 04:35:16.425905', 'step': 13371, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:16.481585', 'step': 13371, 'epoch': 2} {'type': 'loss', 'content': 0.11791151762008667, 'timestamp': '2025-10-01 04:35:16.487810', 'step': 13372, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:16.541702', 'step': 13372, 'epoch': 2} {'type': 'loss', 'content': 0.10383865237236023, 'timestamp': '2025-10-01 04:35:16.543851', 'step': 13373, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:16.597191', 'step': 13373, 'epoch': 2} {'type': 'loss', 'content': 0.2705553472042084, 'timestamp': '2025-10-01 04:35:16.599550', 'step': 13374, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:35:16.655361', 'step': 13374, 'epoch': 2} {'type': 'loss', 'content': 0.17358992993831635, 'timestamp': '2025-10-01 04:35:16.657400', 'step': 13375, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:16.710486', 'step': 13375, 'epoch': 2} {'type': 'loss', 'content': 0.10265383124351501, 'timestamp': '2025-10-01 04:35:16.716139', 'step': 13376, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:16.769049', 'step': 13376, 'epoch': 2} {'type': 'loss', 'content': 0.12099293619394302, 'timestamp': '2025-10-01 04:35:16.770992', 'step': 13377, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:16.824691', 'step': 13377, 'epoch': 2} {'type': 'loss', 'content': 0.08355344831943512, 'timestamp': '2025-10-01 04:35:16.827440', 'step': 13378, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:16.881246', 'step': 13378, 'epoch': 2} {'type': 'loss', 'content': 0.16280603408813477, 'timestamp': '2025-10-01 04:35:16.883630', 'step': 13379, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:16.938008', 'step': 13379, 'epoch': 2} {'type': 'loss', 'content': 0.1410597264766693, 'timestamp': '2025-10-01 04:35:16.944007', 'step': 13380, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:16.997132', 'step': 13380, 'epoch': 2} {'type': 'loss', 'content': 0.11848560720682144, 'timestamp': '2025-10-01 04:35:16.999424', 'step': 13381, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:35:17.053375', 'step': 13381, 'epoch': 2} {'type': 'loss', 'content': 0.15802742540836334, 'timestamp': '2025-10-01 04:35:17.055467', 'step': 13382, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:17.109521', 'step': 13382, 'epoch': 2} {'type': 'loss', 'content': 0.19456607103347778, 'timestamp': '2025-10-01 04:35:17.111709', 'step': 13383, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:17.165710', 'step': 13383, 'epoch': 2} {'type': 'loss', 'content': 0.11118435114622116, 'timestamp': '2025-10-01 04:35:17.171475', 'step': 13384, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:17.224935', 'step': 13384, 'epoch': 2} {'type': 'loss', 'content': 0.045889660716056824, 'timestamp': '2025-10-01 04:35:17.227183', 'step': 13385, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:17.281641', 'step': 13385, 'epoch': 2} {'type': 'loss', 'content': 0.09961560368537903, 'timestamp': '2025-10-01 04:35:17.283716', 'step': 13386, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:17.337682', 'step': 13386, 'epoch': 2} {'type': 'loss', 'content': 0.0498887300491333, 'timestamp': '2025-10-01 04:35:17.339895', 'step': 13387, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:17.393880', 'step': 13387, 'epoch': 2} {'type': 'loss', 'content': 0.058175791054964066, 'timestamp': '2025-10-01 04:35:17.399778', 'step': 13388, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:17.453241', 'step': 13388, 'epoch': 2} {'type': 'loss', 'content': 0.0805804431438446, 'timestamp': '2025-10-01 04:35:17.457527', 'step': 13389, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:17.512489', 'step': 13389, 'epoch': 2} {'type': 'loss', 'content': 0.05406767874956131, 'timestamp': '2025-10-01 04:35:17.515003', 'step': 13390, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:17.569014', 'step': 13390, 'epoch': 2} {'type': 'loss', 'content': 0.1414981633424759, 'timestamp': '2025-10-01 04:35:17.571223', 'step': 13391, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:17.630146', 'step': 13391, 'epoch': 2} {'type': 'loss', 'content': 0.13353721797466278, 'timestamp': '2025-10-01 04:35:17.635927', 'step': 13392, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:17.688486', 'step': 13392, 'epoch': 2} {'type': 'loss', 'content': 0.19526588916778564, 'timestamp': '2025-10-01 04:35:17.690618', 'step': 13393, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:17.744268', 'step': 13393, 'epoch': 2} {'type': 'loss', 'content': 0.10184013843536377, 'timestamp': '2025-10-01 04:35:17.746337', 'step': 13394, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:17.800677', 'step': 13394, 'epoch': 2} {'type': 'loss', 'content': 0.09616287052631378, 'timestamp': '2025-10-01 04:35:17.803127', 'step': 13395, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:17.857782', 'step': 13395, 'epoch': 2} {'type': 'loss', 'content': 0.09355640411376953, 'timestamp': '2025-10-01 04:35:17.867345', 'step': 13396, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:35:17.921951', 'step': 13396, 'epoch': 2} {'type': 'loss', 'content': 0.0803171768784523, 'timestamp': '2025-10-01 04:35:17.924200', 'step': 13397, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:17.978123', 'step': 13397, 'epoch': 2} {'type': 'loss', 'content': 0.12616853415966034, 'timestamp': '2025-10-01 04:35:17.981232', 'step': 13398, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:18.035391', 'step': 13398, 'epoch': 2} {'type': 'loss', 'content': 0.12659712135791779, 'timestamp': '2025-10-01 04:35:18.037530', 'step': 13399, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:18.091574', 'step': 13399, 'epoch': 2} {'type': 'loss', 'content': 0.2424939125776291, 'timestamp': '2025-10-01 04:35:18.097266', 'step': 13400, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:18.158137', 'step': 13400, 'epoch': 2} {'type': 'loss', 'content': 0.17771652340888977, 'timestamp': '2025-10-01 04:35:18.160537', 'step': 13401, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:18.213571', 'step': 13401, 'epoch': 2} {'type': 'loss', 'content': 0.12176699936389923, 'timestamp': '2025-10-01 04:35:18.222221', 'step': 13402, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:35:18.277895', 'step': 13402, 'epoch': 2} {'type': 'loss', 'content': 0.05285944417119026, 'timestamp': '2025-10-01 04:35:18.284007', 'step': 13403, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:18.337196', 'step': 13403, 'epoch': 2} {'type': 'loss', 'content': 0.09997287392616272, 'timestamp': '2025-10-01 04:35:18.342942', 'step': 13404, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:35:18.396504', 'step': 13404, 'epoch': 2} {'type': 'loss', 'content': 0.2005060315132141, 'timestamp': '2025-10-01 04:35:18.398886', 'step': 13405, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:35:18.453753', 'step': 13405, 'epoch': 2} {'type': 'loss', 'content': 0.0787525475025177, 'timestamp': '2025-10-01 04:35:18.456242', 'step': 13406, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:35:18.512286', 'step': 13406, 'epoch': 2} {'type': 'loss', 'content': 0.10870229452848434, 'timestamp': '2025-10-01 04:35:18.514533', 'step': 13407, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:18.572707', 'step': 13407, 'epoch': 2} {'type': 'loss', 'content': 0.16459877789020538, 'timestamp': '2025-10-01 04:35:18.578910', 'step': 13408, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:18.635335', 'step': 13408, 'epoch': 2} {'type': 'loss', 'content': 0.14003756642341614, 'timestamp': '2025-10-01 04:35:18.637980', 'step': 13409, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:18.692959', 'step': 13409, 'epoch': 2} {'type': 'loss', 'content': 0.19434888660907745, 'timestamp': '2025-10-01 04:35:18.695960', 'step': 13410, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:35:18.751164', 'step': 13410, 'epoch': 2} {'type': 'loss', 'content': 0.12523536384105682, 'timestamp': '2025-10-01 04:35:18.753523', 'step': 13411, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:18.808807', 'step': 13411, 'epoch': 2} {'type': 'loss', 'content': 0.13437171280384064, 'timestamp': '2025-10-01 04:35:18.814773', 'step': 13412, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:18.868459', 'step': 13412, 'epoch': 2} {'type': 'loss', 'content': 0.1133376806974411, 'timestamp': '2025-10-01 04:35:18.870915', 'step': 13413, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:18.924853', 'step': 13413, 'epoch': 2} {'type': 'loss', 'content': 0.09325124323368073, 'timestamp': '2025-10-01 04:35:18.930232', 'step': 13414, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:18.984481', 'step': 13414, 'epoch': 2} {'type': 'loss', 'content': 0.1036001592874527, 'timestamp': '2025-10-01 04:35:18.987038', 'step': 13415, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:19.042727', 'step': 13415, 'epoch': 2} {'type': 'loss', 'content': 0.0716748833656311, 'timestamp': '2025-10-01 04:35:19.048813', 'step': 13416, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:19.102931', 'step': 13416, 'epoch': 2} {'type': 'loss', 'content': 0.0979088842868805, 'timestamp': '2025-10-01 04:35:19.105283', 'step': 13417, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:19.159200', 'step': 13417, 'epoch': 2} {'type': 'loss', 'content': 0.12115690857172012, 'timestamp': '2025-10-01 04:35:19.161591', 'step': 13418, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:19.216885', 'step': 13418, 'epoch': 2} {'type': 'loss', 'content': 0.16352827847003937, 'timestamp': '2025-10-01 04:35:19.219456', 'step': 13419, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:19.274808', 'step': 13419, 'epoch': 2} {'type': 'loss', 'content': 0.050143152475357056, 'timestamp': '2025-10-01 04:35:19.282157', 'step': 13420, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:19.337545', 'step': 13420, 'epoch': 2} {'type': 'loss', 'content': 0.1946570873260498, 'timestamp': '2025-10-01 04:35:19.340419', 'step': 13421, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:19.395797', 'step': 13421, 'epoch': 2} {'type': 'loss', 'content': 0.11318562924861908, 'timestamp': '2025-10-01 04:35:19.397984', 'step': 13422, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:19.452201', 'step': 13422, 'epoch': 2} {'type': 'loss', 'content': 0.05245661735534668, 'timestamp': '2025-10-01 04:35:19.454878', 'step': 13423, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:19.509324', 'step': 13423, 'epoch': 2} {'type': 'loss', 'content': 0.05111969634890556, 'timestamp': '2025-10-01 04:35:19.515916', 'step': 13424, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:19.569906', 'step': 13424, 'epoch': 2} {'type': 'loss', 'content': 0.1005188450217247, 'timestamp': '2025-10-01 04:35:19.572046', 'step': 13425, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:19.626599', 'step': 13425, 'epoch': 2} {'type': 'loss', 'content': 0.08907943964004517, 'timestamp': '2025-10-01 04:35:19.628958', 'step': 13426, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:19.687708', 'step': 13426, 'epoch': 2} {'type': 'loss', 'content': 0.0845440924167633, 'timestamp': '2025-10-01 04:35:19.689954', 'step': 13427, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:35:19.745529', 'step': 13427, 'epoch': 2} {'type': 'loss', 'content': 0.13722829520702362, 'timestamp': '2025-10-01 04:35:19.751761', 'step': 13428, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:35:19.805179', 'step': 13428, 'epoch': 2} {'type': 'loss', 'content': 0.08137432485818863, 'timestamp': '2025-10-01 04:35:19.807384', 'step': 13429, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:19.861637', 'step': 13429, 'epoch': 2} {'type': 'loss', 'content': 0.13929933309555054, 'timestamp': '2025-10-01 04:35:19.864187', 'step': 13430, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:35:19.918547', 'step': 13430, 'epoch': 2} {'type': 'loss', 'content': 0.06419729441404343, 'timestamp': '2025-10-01 04:35:19.920921', 'step': 13431, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-01 04:35:19.976804', 'step': 13431, 'epoch': 2} {'type': 'loss', 'content': 0.0859769657254219, 'timestamp': '2025-10-01 04:35:19.986692', 'step': 13432, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:20.040976', 'step': 13432, 'epoch': 2} {'type': 'loss', 'content': 0.1283719390630722, 'timestamp': '2025-10-01 04:35:20.046099', 'step': 13433, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:20.100710', 'step': 13433, 'epoch': 2} {'type': 'loss', 'content': 0.09214354306459427, 'timestamp': '2025-10-01 04:35:20.103280', 'step': 13434, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:20.161906', 'step': 13434, 'epoch': 2} {'type': 'loss', 'content': 0.16751056909561157, 'timestamp': '2025-10-01 04:35:20.164889', 'step': 13435, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:20.218647', 'step': 13435, 'epoch': 2} {'type': 'loss', 'content': 0.12778227031230927, 'timestamp': '2025-10-01 04:35:20.224933', 'step': 13436, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:20.279234', 'step': 13436, 'epoch': 2} {'type': 'loss', 'content': 0.10830553621053696, 'timestamp': '2025-10-01 04:35:20.281541', 'step': 13437, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:20.335310', 'step': 13437, 'epoch': 2} {'type': 'loss', 'content': 0.1138141080737114, 'timestamp': '2025-10-01 04:35:20.337456', 'step': 13438, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:35:20.391738', 'step': 13438, 'epoch': 2} {'type': 'loss', 'content': 0.20031189918518066, 'timestamp': '2025-10-01 04:35:20.394048', 'step': 13439, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-01 04:35:20.447949', 'step': 13439, 'epoch': 2} {'type': 'loss', 'content': 0.11955121904611588, 'timestamp': '2025-10-01 04:35:20.453714', 'step': 13440, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:20.506234', 'step': 13440, 'epoch': 2} {'type': 'loss', 'content': 0.10204022377729416, 'timestamp': '2025-10-01 04:35:20.508275', 'step': 13441, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:20.562210', 'step': 13441, 'epoch': 2} {'type': 'loss', 'content': 0.15815426409244537, 'timestamp': '2025-10-01 04:35:20.564396', 'step': 13442, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:20.617889', 'step': 13442, 'epoch': 2} {'type': 'loss', 'content': 0.14517557621002197, 'timestamp': '2025-10-01 04:35:20.620223', 'step': 13443, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:35:20.673290', 'step': 13443, 'epoch': 2} {'type': 'loss', 'content': 0.1948051154613495, 'timestamp': '2025-10-01 04:35:20.679439', 'step': 13444, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:20.732814', 'step': 13444, 'epoch': 2} {'type': 'loss', 'content': 0.11169186979532242, 'timestamp': '2025-10-01 04:35:20.734967', 'step': 13445, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:20.789489', 'step': 13445, 'epoch': 2} {'type': 'loss', 'content': 0.08592919260263443, 'timestamp': '2025-10-01 04:35:20.791597', 'step': 13446, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:20.845379', 'step': 13446, 'epoch': 2} {'type': 'loss', 'content': 0.09864453226327896, 'timestamp': '2025-10-01 04:35:20.847669', 'step': 13447, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:35:20.901256', 'step': 13447, 'epoch': 2} {'type': 'loss', 'content': 0.14818230271339417, 'timestamp': '2025-10-01 04:35:20.906961', 'step': 13448, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:20.961084', 'step': 13448, 'epoch': 2} {'type': 'loss', 'content': 0.081344373524189, 'timestamp': '2025-10-01 04:35:20.963220', 'step': 13449, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:21.017785', 'step': 13449, 'epoch': 2} {'type': 'loss', 'content': 0.1384701430797577, 'timestamp': '2025-10-01 04:35:21.019857', 'step': 13450, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:21.073342', 'step': 13450, 'epoch': 2} {'type': 'loss', 'content': 0.08989375084638596, 'timestamp': '2025-10-01 04:35:21.075811', 'step': 13451, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:21.128964', 'step': 13451, 'epoch': 2} {'type': 'loss', 'content': 0.05952826887369156, 'timestamp': '2025-10-01 04:35:21.134706', 'step': 13452, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:21.187866', 'step': 13452, 'epoch': 2} {'type': 'loss', 'content': 0.1155497208237648, 'timestamp': '2025-10-01 04:35:21.190094', 'step': 13453, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:21.245936', 'step': 13453, 'epoch': 2} {'type': 'loss', 'content': 0.16117943823337555, 'timestamp': '2025-10-01 04:35:21.248229', 'step': 13454, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:21.301859', 'step': 13454, 'epoch': 2} {'type': 'loss', 'content': 0.2020045965909958, 'timestamp': '2025-10-01 04:35:21.304218', 'step': 13455, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:21.357431', 'step': 13455, 'epoch': 2} {'type': 'loss', 'content': 0.12038692831993103, 'timestamp': '2025-10-01 04:35:21.363138', 'step': 13456, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:21.416641', 'step': 13456, 'epoch': 2} {'type': 'loss', 'content': 0.139593243598938, 'timestamp': '2025-10-01 04:35:21.419307', 'step': 13457, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:21.473884', 'step': 13457, 'epoch': 2} {'type': 'loss', 'content': 0.14565056562423706, 'timestamp': '2025-10-01 04:35:21.476019', 'step': 13458, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:21.529442', 'step': 13458, 'epoch': 2} {'type': 'loss', 'content': 0.15863439440727234, 'timestamp': '2025-10-01 04:35:21.531684', 'step': 13459, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:21.585102', 'step': 13459, 'epoch': 2} {'type': 'loss', 'content': 0.17571817338466644, 'timestamp': '2025-10-01 04:35:21.591036', 'step': 13460, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:21.644241', 'step': 13460, 'epoch': 2} {'type': 'loss', 'content': 0.13482151925563812, 'timestamp': '2025-10-01 04:35:21.659929', 'step': 13461, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:21.713988', 'step': 13461, 'epoch': 2} {'type': 'loss', 'content': 0.10806748270988464, 'timestamp': '2025-10-01 04:35:21.716158', 'step': 13462, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:21.770022', 'step': 13462, 'epoch': 2} {'type': 'loss', 'content': 0.13069036602973938, 'timestamp': '2025-10-01 04:35:21.772175', 'step': 13463, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:21.825862', 'step': 13463, 'epoch': 2} {'type': 'loss', 'content': 0.09819700568914413, 'timestamp': '2025-10-01 04:35:21.831696', 'step': 13464, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:21.886340', 'step': 13464, 'epoch': 2} {'type': 'loss', 'content': 0.11055498570203781, 'timestamp': '2025-10-01 04:35:21.888462', 'step': 13465, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:21.941561', 'step': 13465, 'epoch': 2} {'type': 'loss', 'content': 0.06352728605270386, 'timestamp': '2025-10-01 04:35:21.943801', 'step': 13466, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:21.999714', 'step': 13466, 'epoch': 2} {'type': 'loss', 'content': 0.18469519913196564, 'timestamp': '2025-10-01 04:35:22.002127', 'step': 13467, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:35:22.056112', 'step': 13467, 'epoch': 2} {'type': 'loss', 'content': 0.062224991619586945, 'timestamp': '2025-10-01 04:35:22.061750', 'step': 13468, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:22.115447', 'step': 13468, 'epoch': 2} {'type': 'loss', 'content': 0.1052655428647995, 'timestamp': '2025-10-01 04:35:22.117581', 'step': 13469, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:22.170813', 'step': 13469, 'epoch': 2} {'type': 'loss', 'content': 0.11350102722644806, 'timestamp': '2025-10-01 04:35:22.172886', 'step': 13470, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:22.226124', 'step': 13470, 'epoch': 2} {'type': 'loss', 'content': 0.16484133899211884, 'timestamp': '2025-10-01 04:35:22.228187', 'step': 13471, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:22.281302', 'step': 13471, 'epoch': 2} {'type': 'loss', 'content': 0.0770220011472702, 'timestamp': '2025-10-01 04:35:22.289596', 'step': 13472, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:35:22.353281', 'step': 13472, 'epoch': 2} {'type': 'loss', 'content': 0.15414051711559296, 'timestamp': '2025-10-01 04:35:22.355352', 'step': 13473, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:22.409113', 'step': 13473, 'epoch': 2} {'type': 'loss', 'content': 0.1517106592655182, 'timestamp': '2025-10-01 04:35:22.411371', 'step': 13474, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:35:22.465583', 'step': 13474, 'epoch': 2} {'type': 'loss', 'content': 0.08029632270336151, 'timestamp': '2025-10-01 04:35:22.468888', 'step': 13475, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:22.523508', 'step': 13475, 'epoch': 2} {'type': 'loss', 'content': 0.13839586079120636, 'timestamp': '2025-10-01 04:35:22.530584', 'step': 13476, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:22.589228', 'step': 13476, 'epoch': 2} {'type': 'loss', 'content': 0.11245471984148026, 'timestamp': '2025-10-01 04:35:22.592775', 'step': 13477, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:22.646437', 'step': 13477, 'epoch': 2} {'type': 'loss', 'content': 0.07653377950191498, 'timestamp': '2025-10-01 04:35:22.649214', 'step': 13478, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:22.702540', 'step': 13478, 'epoch': 2} {'type': 'loss', 'content': 0.05482478439807892, 'timestamp': '2025-10-01 04:35:22.704719', 'step': 13479, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:22.763222', 'step': 13479, 'epoch': 2} {'type': 'loss', 'content': 0.17575012147426605, 'timestamp': '2025-10-01 04:35:22.769007', 'step': 13480, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:22.822215', 'step': 13480, 'epoch': 2} {'type': 'loss', 'content': 0.1534060537815094, 'timestamp': '2025-10-01 04:35:22.824347', 'step': 13481, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:22.878797', 'step': 13481, 'epoch': 2} {'type': 'loss', 'content': 0.07761570066213608, 'timestamp': '2025-10-01 04:35:22.881084', 'step': 13482, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:22.935130', 'step': 13482, 'epoch': 2} {'type': 'loss', 'content': 0.07427743822336197, 'timestamp': '2025-10-01 04:35:22.938201', 'step': 13483, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:22.992282', 'step': 13483, 'epoch': 2} {'type': 'loss', 'content': 0.1300896257162094, 'timestamp': '2025-10-01 04:35:22.998422', 'step': 13484, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:23.053321', 'step': 13484, 'epoch': 2} {'type': 'loss', 'content': 0.08988934755325317, 'timestamp': '2025-10-01 04:35:23.055345', 'step': 13485, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:23.109181', 'step': 13485, 'epoch': 2} {'type': 'loss', 'content': 0.11233606934547424, 'timestamp': '2025-10-01 04:35:23.111266', 'step': 13486, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:23.165547', 'step': 13486, 'epoch': 2} {'type': 'loss', 'content': 0.06766616553068161, 'timestamp': '2025-10-01 04:35:23.167779', 'step': 13487, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:23.222848', 'step': 13487, 'epoch': 2} {'type': 'loss', 'content': 0.13105681538581848, 'timestamp': '2025-10-01 04:35:23.228909', 'step': 13488, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:23.282272', 'step': 13488, 'epoch': 2} {'type': 'loss', 'content': 0.13602791726589203, 'timestamp': '2025-10-01 04:35:23.284405', 'step': 13489, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:23.337986', 'step': 13489, 'epoch': 2} {'type': 'loss', 'content': 0.09227360785007477, 'timestamp': '2025-10-01 04:35:23.341703', 'step': 13490, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:23.398910', 'step': 13490, 'epoch': 2} {'type': 'loss', 'content': 0.08234189450740814, 'timestamp': '2025-10-01 04:35:23.401149', 'step': 13491, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:23.454433', 'step': 13491, 'epoch': 2} {'type': 'loss', 'content': 0.11515071988105774, 'timestamp': '2025-10-01 04:35:23.462354', 'step': 13492, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:23.518887', 'step': 13492, 'epoch': 2} {'type': 'loss', 'content': 0.1114843562245369, 'timestamp': '2025-10-01 04:35:23.525534', 'step': 13493, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:23.584206', 'step': 13493, 'epoch': 2} {'type': 'loss', 'content': 0.12296578288078308, 'timestamp': '2025-10-01 04:35:23.586490', 'step': 13494, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:35:23.640644', 'step': 13494, 'epoch': 2} {'type': 'loss', 'content': 0.15828244388103485, 'timestamp': '2025-10-01 04:35:23.642903', 'step': 13495, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:23.697413', 'step': 13495, 'epoch': 2} {'type': 'loss', 'content': 0.21004922688007355, 'timestamp': '2025-10-01 04:35:23.703161', 'step': 13496, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:23.757466', 'step': 13496, 'epoch': 2} {'type': 'loss', 'content': 0.11341465264558792, 'timestamp': '2025-10-01 04:35:23.759790', 'step': 13497, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:23.813148', 'step': 13497, 'epoch': 2} {'type': 'loss', 'content': 0.12668471038341522, 'timestamp': '2025-10-01 04:35:23.815357', 'step': 13498, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:23.870199', 'step': 13498, 'epoch': 2} {'type': 'loss', 'content': 0.14523932337760925, 'timestamp': '2025-10-01 04:35:23.872796', 'step': 13499, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:23.929728', 'step': 13499, 'epoch': 2} {'type': 'loss', 'content': 0.09692159295082092, 'timestamp': '2025-10-01 04:35:23.937349', 'step': 13500, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 13500', 'timestamp': '2025-10-01 04:35:24.321102', 'step': 13500, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:24.376685', 'step': 13500, 'epoch': 2} {'type': 'loss', 'content': 0.14649802446365356, 'timestamp': '2025-10-01 04:35:24.378775', 'step': 13501, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:24.433286', 'step': 13501, 'epoch': 2} {'type': 'loss', 'content': 0.08970150351524353, 'timestamp': '2025-10-01 04:35:24.435338', 'step': 13502, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:24.489656', 'step': 13502, 'epoch': 2} {'type': 'loss', 'content': 0.11900150775909424, 'timestamp': '2025-10-01 04:35:24.492275', 'step': 13503, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:24.570543', 'step': 13503, 'epoch': 2} {'type': 'loss', 'content': 0.07769086956977844, 'timestamp': '2025-10-01 04:35:24.576371', 'step': 13504, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:24.629582', 'step': 13504, 'epoch': 2} {'type': 'loss', 'content': 0.11868711560964584, 'timestamp': '2025-10-01 04:35:24.631768', 'step': 13505, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:24.685637', 'step': 13505, 'epoch': 2} {'type': 'loss', 'content': 0.10479459166526794, 'timestamp': '2025-10-01 04:35:24.698875', 'step': 13506, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:24.752916', 'step': 13506, 'epoch': 2} {'type': 'loss', 'content': 0.06315988302230835, 'timestamp': '2025-10-01 04:35:24.755064', 'step': 13507, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:24.811626', 'step': 13507, 'epoch': 2} {'type': 'loss', 'content': 0.09443255513906479, 'timestamp': '2025-10-01 04:35:24.817384', 'step': 13508, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:24.870698', 'step': 13508, 'epoch': 2} {'type': 'loss', 'content': 0.07872693985700607, 'timestamp': '2025-10-01 04:35:24.872993', 'step': 13509, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:24.927105', 'step': 13509, 'epoch': 2} {'type': 'loss', 'content': 0.11052896827459335, 'timestamp': '2025-10-01 04:35:24.929450', 'step': 13510, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:35:24.984490', 'step': 13510, 'epoch': 2} {'type': 'loss', 'content': 0.21907857060432434, 'timestamp': '2025-10-01 04:35:24.987322', 'step': 13511, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:25.047141', 'step': 13511, 'epoch': 2} {'type': 'loss', 'content': 0.07706935703754425, 'timestamp': '2025-10-01 04:35:25.052827', 'step': 13512, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:35:25.107108', 'step': 13512, 'epoch': 2} {'type': 'loss', 'content': 0.06497086584568024, 'timestamp': '2025-10-01 04:35:25.109284', 'step': 13513, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:25.162778', 'step': 13513, 'epoch': 2} {'type': 'loss', 'content': 0.07384341955184937, 'timestamp': '2025-10-01 04:35:25.176939', 'step': 13514, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-01 04:35:38.651479', 'step': 13514, 'epoch': 2} {'type': 'pplx', 'content': 11858.935306412528, 'timestamp': '2025-10-01 04:35:38.654869', 'step': 13514, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:38.709064', 'step': 13514, 'epoch': 2} {'type': 'loss', 'content': 0.14689390361309052, 'timestamp': '2025-10-01 04:35:38.711264', 'step': 13515, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:38.766764', 'step': 13515, 'epoch': 2} {'type': 'loss', 'content': 0.11160816252231598, 'timestamp': '2025-10-01 04:35:38.773879', 'step': 13516, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:38.827971', 'step': 13516, 'epoch': 2} {'type': 'loss', 'content': 0.09273482859134674, 'timestamp': '2025-10-01 04:35:38.830261', 'step': 13517, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:38.886218', 'step': 13517, 'epoch': 2} {'type': 'loss', 'content': 0.10637770593166351, 'timestamp': '2025-10-01 04:35:38.889108', 'step': 13518, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:38.945272', 'step': 13518, 'epoch': 2} {'type': 'loss', 'content': 0.2535763680934906, 'timestamp': '2025-10-01 04:35:38.948036', 'step': 13519, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:39.002244', 'step': 13519, 'epoch': 2} {'type': 'loss', 'content': 0.12259207665920258, 'timestamp': '2025-10-01 04:35:39.008635', 'step': 13520, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:39.063142', 'step': 13520, 'epoch': 2} {'type': 'loss', 'content': 0.1815364807844162, 'timestamp': '2025-10-01 04:35:39.066084', 'step': 13521, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:35:39.120438', 'step': 13521, 'epoch': 2} {'type': 'loss', 'content': 0.17624467611312866, 'timestamp': '2025-10-01 04:35:39.122715', 'step': 13522, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:39.177691', 'step': 13522, 'epoch': 2} {'type': 'loss', 'content': 0.049021340906620026, 'timestamp': '2025-10-01 04:35:39.180256', 'step': 13523, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:39.248347', 'step': 13523, 'epoch': 2} {'type': 'loss', 'content': 0.06928662955760956, 'timestamp': '2025-10-01 04:35:39.254415', 'step': 13524, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:39.308852', 'step': 13524, 'epoch': 2} {'type': 'loss', 'content': 0.15740962326526642, 'timestamp': '2025-10-01 04:35:39.311429', 'step': 13525, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:39.365904', 'step': 13525, 'epoch': 2} {'type': 'loss', 'content': 0.057917848229408264, 'timestamp': '2025-10-01 04:35:39.368153', 'step': 13526, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:39.422776', 'step': 13526, 'epoch': 2} {'type': 'loss', 'content': 0.1440490186214447, 'timestamp': '2025-10-01 04:35:39.425349', 'step': 13527, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:39.479071', 'step': 13527, 'epoch': 2} {'type': 'loss', 'content': 0.10519153624773026, 'timestamp': '2025-10-01 04:35:39.484894', 'step': 13528, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:35:39.538321', 'step': 13528, 'epoch': 2} {'type': 'loss', 'content': 0.11907927691936493, 'timestamp': '2025-10-01 04:35:39.540614', 'step': 13529, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:39.594179', 'step': 13529, 'epoch': 2} {'type': 'loss', 'content': 0.09176594018936157, 'timestamp': '2025-10-01 04:35:39.596755', 'step': 13530, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:39.650124', 'step': 13530, 'epoch': 2} {'type': 'loss', 'content': 0.16079728305339813, 'timestamp': '2025-10-01 04:35:39.652228', 'step': 13531, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:39.713584', 'step': 13531, 'epoch': 2} {'type': 'loss', 'content': 0.08425060659646988, 'timestamp': '2025-10-01 04:35:39.719578', 'step': 13532, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:39.775731', 'step': 13532, 'epoch': 2} {'type': 'loss', 'content': 0.0949251800775528, 'timestamp': '2025-10-01 04:35:39.777931', 'step': 13533, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:39.832180', 'step': 13533, 'epoch': 2} {'type': 'loss', 'content': 0.08938401937484741, 'timestamp': '2025-10-01 04:35:39.834341', 'step': 13534, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:39.888579', 'step': 13534, 'epoch': 2} {'type': 'loss', 'content': 0.12435156106948853, 'timestamp': '2025-10-01 04:35:39.890686', 'step': 13535, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:35:39.944808', 'step': 13535, 'epoch': 2} {'type': 'loss', 'content': 0.13124780356884003, 'timestamp': '2025-10-01 04:35:39.951400', 'step': 13536, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:40.005043', 'step': 13536, 'epoch': 2} {'type': 'loss', 'content': 0.060285743325948715, 'timestamp': '2025-10-01 04:35:40.007628', 'step': 13537, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:40.060965', 'step': 13537, 'epoch': 2} {'type': 'loss', 'content': 0.08178672939538956, 'timestamp': '2025-10-01 04:35:40.063143', 'step': 13538, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:40.119407', 'step': 13538, 'epoch': 2} {'type': 'loss', 'content': 0.1252809762954712, 'timestamp': '2025-10-01 04:35:40.133052', 'step': 13539, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:40.192928', 'step': 13539, 'epoch': 2} {'type': 'loss', 'content': 0.13064678013324738, 'timestamp': '2025-10-01 04:35:40.199705', 'step': 13540, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:35:40.254869', 'step': 13540, 'epoch': 2} {'type': 'loss', 'content': 0.19942493736743927, 'timestamp': '2025-10-01 04:35:40.258066', 'step': 13541, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:40.311450', 'step': 13541, 'epoch': 2} {'type': 'loss', 'content': 0.06462399661540985, 'timestamp': '2025-10-01 04:35:40.313567', 'step': 13542, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:40.367345', 'step': 13542, 'epoch': 2} {'type': 'loss', 'content': 0.10989825427532196, 'timestamp': '2025-10-01 04:35:40.372483', 'step': 13543, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:40.437460', 'step': 13543, 'epoch': 2} {'type': 'loss', 'content': 0.10292810201644897, 'timestamp': '2025-10-01 04:35:40.443678', 'step': 13544, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:40.496996', 'step': 13544, 'epoch': 2} {'type': 'loss', 'content': 0.15867747366428375, 'timestamp': '2025-10-01 04:35:40.499286', 'step': 13545, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:40.553266', 'step': 13545, 'epoch': 2} {'type': 'loss', 'content': 0.1789356917142868, 'timestamp': '2025-10-01 04:35:40.556472', 'step': 13546, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:40.619733', 'step': 13546, 'epoch': 2} {'type': 'loss', 'content': 0.23607885837554932, 'timestamp': '2025-10-01 04:35:40.622332', 'step': 13547, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:40.677211', 'step': 13547, 'epoch': 2} {'type': 'loss', 'content': 0.15754994750022888, 'timestamp': '2025-10-01 04:35:40.683626', 'step': 13548, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:40.739418', 'step': 13548, 'epoch': 2} {'type': 'loss', 'content': 0.05852208286523819, 'timestamp': '2025-10-01 04:35:40.741968', 'step': 13549, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:40.796963', 'step': 13549, 'epoch': 2} {'type': 'loss', 'content': 0.11886576563119888, 'timestamp': '2025-10-01 04:35:40.799067', 'step': 13550, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:40.857175', 'step': 13550, 'epoch': 2} {'type': 'loss', 'content': 0.13202513754367828, 'timestamp': '2025-10-01 04:35:40.859380', 'step': 13551, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:40.912933', 'step': 13551, 'epoch': 2} {'type': 'loss', 'content': 0.1410217136144638, 'timestamp': '2025-10-01 04:35:40.919019', 'step': 13552, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:40.971890', 'step': 13552, 'epoch': 2} {'type': 'loss', 'content': 0.11144129931926727, 'timestamp': '2025-10-01 04:35:40.973985', 'step': 13553, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:41.027645', 'step': 13553, 'epoch': 2} {'type': 'loss', 'content': 0.1323840618133545, 'timestamp': '2025-10-01 04:35:41.029938', 'step': 13554, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:41.083936', 'step': 13554, 'epoch': 2} {'type': 'loss', 'content': 0.17882508039474487, 'timestamp': '2025-10-01 04:35:41.086132', 'step': 13555, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:41.139299', 'step': 13555, 'epoch': 2} {'type': 'loss', 'content': 0.1918591558933258, 'timestamp': '2025-10-01 04:35:41.155182', 'step': 13556, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:41.207929', 'step': 13556, 'epoch': 2} {'type': 'loss', 'content': 0.11919242888689041, 'timestamp': '2025-10-01 04:35:41.210300', 'step': 13557, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:41.263414', 'step': 13557, 'epoch': 2} {'type': 'loss', 'content': 0.11148890107870102, 'timestamp': '2025-10-01 04:35:41.265442', 'step': 13558, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:41.318538', 'step': 13558, 'epoch': 2} {'type': 'loss', 'content': 0.12471199035644531, 'timestamp': '2025-10-01 04:35:41.320794', 'step': 13559, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:35:41.373987', 'step': 13559, 'epoch': 2} {'type': 'loss', 'content': 0.07598766684532166, 'timestamp': '2025-10-01 04:35:41.380022', 'step': 13560, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:41.432845', 'step': 13560, 'epoch': 2} {'type': 'loss', 'content': 0.10018717497587204, 'timestamp': '2025-10-01 04:35:41.435269', 'step': 13561, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:41.490067', 'step': 13561, 'epoch': 2} {'type': 'loss', 'content': 0.0739874541759491, 'timestamp': '2025-10-01 04:35:41.492301', 'step': 13562, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:41.550063', 'step': 13562, 'epoch': 2} {'type': 'loss', 'content': 0.11931756138801575, 'timestamp': '2025-10-01 04:35:41.552263', 'step': 13563, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:35:41.608791', 'step': 13563, 'epoch': 2} {'type': 'loss', 'content': 0.1054900586605072, 'timestamp': '2025-10-01 04:35:41.615091', 'step': 13564, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:41.668724', 'step': 13564, 'epoch': 2} {'type': 'loss', 'content': 0.12601850926876068, 'timestamp': '2025-10-01 04:35:41.670792', 'step': 13565, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:41.726300', 'step': 13565, 'epoch': 2} {'type': 'loss', 'content': 0.1107216402888298, 'timestamp': '2025-10-01 04:35:41.728523', 'step': 13566, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:41.784961', 'step': 13566, 'epoch': 2} {'type': 'loss', 'content': 0.18951648473739624, 'timestamp': '2025-10-01 04:35:41.787160', 'step': 13567, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:41.844377', 'step': 13567, 'epoch': 2} {'type': 'loss', 'content': 0.1330816149711609, 'timestamp': '2025-10-01 04:35:41.851249', 'step': 13568, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:41.910071', 'step': 13568, 'epoch': 2} {'type': 'loss', 'content': 0.16205020248889923, 'timestamp': '2025-10-01 04:35:41.912140', 'step': 13569, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:41.969947', 'step': 13569, 'epoch': 2} {'type': 'loss', 'content': 0.10109458863735199, 'timestamp': '2025-10-01 04:35:41.978576', 'step': 13570, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:42.036594', 'step': 13570, 'epoch': 2} {'type': 'loss', 'content': 0.1493850201368332, 'timestamp': '2025-10-01 04:35:42.038848', 'step': 13571, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:42.097522', 'step': 13571, 'epoch': 2} {'type': 'loss', 'content': 0.13821108639240265, 'timestamp': '2025-10-01 04:35:42.105178', 'step': 13572, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:42.162260', 'step': 13572, 'epoch': 2} {'type': 'loss', 'content': 0.11353956162929535, 'timestamp': '2025-10-01 04:35:42.164534', 'step': 13573, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:42.221441', 'step': 13573, 'epoch': 2} {'type': 'loss', 'content': 0.13088874518871307, 'timestamp': '2025-10-01 04:35:42.223533', 'step': 13574, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:42.281208', 'step': 13574, 'epoch': 2} {'type': 'loss', 'content': 0.07478073984384537, 'timestamp': '2025-10-01 04:35:42.283380', 'step': 13575, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:42.339236', 'step': 13575, 'epoch': 2} {'type': 'loss', 'content': 0.14970488846302032, 'timestamp': '2025-10-01 04:35:42.345732', 'step': 13576, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:42.409430', 'step': 13576, 'epoch': 2} {'type': 'loss', 'content': 0.1303802877664566, 'timestamp': '2025-10-01 04:35:42.411696', 'step': 13577, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:35:42.471228', 'step': 13577, 'epoch': 2} {'type': 'loss', 'content': 0.1003836914896965, 'timestamp': '2025-10-01 04:35:42.473517', 'step': 13578, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:35:42.533826', 'step': 13578, 'epoch': 2} {'type': 'loss', 'content': 0.07962588965892792, 'timestamp': '2025-10-01 04:35:42.536198', 'step': 13579, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:42.595052', 'step': 13579, 'epoch': 2} {'type': 'loss', 'content': 0.17520396411418915, 'timestamp': '2025-10-01 04:35:42.601697', 'step': 13580, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:42.656518', 'step': 13580, 'epoch': 2} {'type': 'loss', 'content': 0.10683264583349228, 'timestamp': '2025-10-01 04:35:42.659086', 'step': 13581, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:42.713482', 'step': 13581, 'epoch': 2} {'type': 'loss', 'content': 0.13900141417980194, 'timestamp': '2025-10-01 04:35:42.716038', 'step': 13582, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:42.770777', 'step': 13582, 'epoch': 2} {'type': 'loss', 'content': 0.1597633957862854, 'timestamp': '2025-10-01 04:35:42.773444', 'step': 13583, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:42.844614', 'step': 13583, 'epoch': 2} {'type': 'loss', 'content': 0.08142032474279404, 'timestamp': '2025-10-01 04:35:42.850883', 'step': 13584, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:42.904904', 'step': 13584, 'epoch': 2} {'type': 'loss', 'content': 0.16764013469219208, 'timestamp': '2025-10-01 04:35:42.907045', 'step': 13585, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:42.960816', 'step': 13585, 'epoch': 2} {'type': 'loss', 'content': 0.1489454209804535, 'timestamp': '2025-10-01 04:35:42.962830', 'step': 13586, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:35:43.017298', 'step': 13586, 'epoch': 2} {'type': 'loss', 'content': 0.08247071504592896, 'timestamp': '2025-10-01 04:35:43.019438', 'step': 13587, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:35:43.073762', 'step': 13587, 'epoch': 2} {'type': 'loss', 'content': 0.14382882416248322, 'timestamp': '2025-10-01 04:35:43.080299', 'step': 13588, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:43.135071', 'step': 13588, 'epoch': 2} {'type': 'loss', 'content': 0.09702647477388382, 'timestamp': '2025-10-01 04:35:43.137689', 'step': 13589, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:43.192921', 'step': 13589, 'epoch': 2} {'type': 'loss', 'content': 0.09994682669639587, 'timestamp': '2025-10-01 04:35:43.195226', 'step': 13590, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:43.249497', 'step': 13590, 'epoch': 2} {'type': 'loss', 'content': 0.08514989912509918, 'timestamp': '2025-10-01 04:35:43.251852', 'step': 13591, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:43.307322', 'step': 13591, 'epoch': 2} {'type': 'loss', 'content': 0.23011936247348785, 'timestamp': '2025-10-01 04:35:43.313837', 'step': 13592, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:43.378478', 'step': 13592, 'epoch': 2} {'type': 'loss', 'content': 0.12258046120405197, 'timestamp': '2025-10-01 04:35:43.380895', 'step': 13593, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:35:43.446930', 'step': 13593, 'epoch': 2} {'type': 'loss', 'content': 0.07062000781297684, 'timestamp': '2025-10-01 04:35:43.449859', 'step': 13594, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:43.506719', 'step': 13594, 'epoch': 2} {'type': 'loss', 'content': 0.16094906628131866, 'timestamp': '2025-10-01 04:35:43.509186', 'step': 13595, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:43.565459', 'step': 13595, 'epoch': 2} {'type': 'loss', 'content': 0.07508090883493423, 'timestamp': '2025-10-01 04:35:43.571932', 'step': 13596, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:43.628353', 'step': 13596, 'epoch': 2} {'type': 'loss', 'content': 0.11702091991901398, 'timestamp': '2025-10-01 04:35:43.630672', 'step': 13597, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:43.687650', 'step': 13597, 'epoch': 2} {'type': 'loss', 'content': 0.1954726278781891, 'timestamp': '2025-10-01 04:35:43.689682', 'step': 13598, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:43.747989', 'step': 13598, 'epoch': 2} {'type': 'loss', 'content': 0.18397016823291779, 'timestamp': '2025-10-01 04:35:43.750309', 'step': 13599, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:43.808786', 'step': 13599, 'epoch': 2} {'type': 'loss', 'content': 0.09524809569120407, 'timestamp': '2025-10-01 04:35:43.825432', 'step': 13600, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:43.888756', 'step': 13600, 'epoch': 2} {'type': 'loss', 'content': 0.07160904258489609, 'timestamp': '2025-10-01 04:35:43.891299', 'step': 13601, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:43.949064', 'step': 13601, 'epoch': 2} {'type': 'loss', 'content': 0.19748902320861816, 'timestamp': '2025-10-01 04:35:43.951340', 'step': 13602, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:44.008875', 'step': 13602, 'epoch': 2} {'type': 'loss', 'content': 0.12314010411500931, 'timestamp': '2025-10-01 04:35:44.011173', 'step': 13603, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:44.067494', 'step': 13603, 'epoch': 2} {'type': 'loss', 'content': 0.07631021738052368, 'timestamp': '2025-10-01 04:35:44.073897', 'step': 13604, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:44.127504', 'step': 13604, 'epoch': 2} {'type': 'loss', 'content': 0.17327585816383362, 'timestamp': '2025-10-01 04:35:44.130787', 'step': 13605, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:44.185184', 'step': 13605, 'epoch': 2} {'type': 'loss', 'content': 0.09241184592247009, 'timestamp': '2025-10-01 04:35:44.187541', 'step': 13606, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:44.241736', 'step': 13606, 'epoch': 2} {'type': 'loss', 'content': 0.13296347856521606, 'timestamp': '2025-10-01 04:35:44.244098', 'step': 13607, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:44.299642', 'step': 13607, 'epoch': 2} {'type': 'loss', 'content': 0.093715600669384, 'timestamp': '2025-10-01 04:35:44.306527', 'step': 13608, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:44.361166', 'step': 13608, 'epoch': 2} {'type': 'loss', 'content': 0.1313614398241043, 'timestamp': '2025-10-01 04:35:44.363377', 'step': 13609, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:44.417368', 'step': 13609, 'epoch': 2} {'type': 'loss', 'content': 0.13524441421031952, 'timestamp': '2025-10-01 04:35:44.419574', 'step': 13610, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:44.474181', 'step': 13610, 'epoch': 2} {'type': 'loss', 'content': 0.13066446781158447, 'timestamp': '2025-10-01 04:35:44.476453', 'step': 13611, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:44.529535', 'step': 13611, 'epoch': 2} {'type': 'loss', 'content': 0.10530976206064224, 'timestamp': '2025-10-01 04:35:44.538639', 'step': 13612, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:44.592089', 'step': 13612, 'epoch': 2} {'type': 'loss', 'content': 0.22862283885478973, 'timestamp': '2025-10-01 04:35:44.594341', 'step': 13613, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:44.658001', 'step': 13613, 'epoch': 2} {'type': 'loss', 'content': 0.0897994115948677, 'timestamp': '2025-10-01 04:35:44.659917', 'step': 13614, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:44.713023', 'step': 13614, 'epoch': 2} {'type': 'loss', 'content': 0.1573873907327652, 'timestamp': '2025-10-01 04:35:44.715181', 'step': 13615, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:44.769368', 'step': 13615, 'epoch': 2} {'type': 'loss', 'content': 0.21737486124038696, 'timestamp': '2025-10-01 04:35:44.775319', 'step': 13616, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:35:44.828714', 'step': 13616, 'epoch': 2} {'type': 'loss', 'content': 0.13127991557121277, 'timestamp': '2025-10-01 04:35:44.830805', 'step': 13617, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:44.884232', 'step': 13617, 'epoch': 2} {'type': 'loss', 'content': 0.2412251979112625, 'timestamp': '2025-10-01 04:35:44.886505', 'step': 13618, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:44.942390', 'step': 13618, 'epoch': 2} {'type': 'loss', 'content': 0.1258177012205124, 'timestamp': '2025-10-01 04:35:44.952796', 'step': 13619, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:45.007282', 'step': 13619, 'epoch': 2} {'type': 'loss', 'content': 0.09105583280324936, 'timestamp': '2025-10-01 04:35:45.013676', 'step': 13620, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:45.066578', 'step': 13620, 'epoch': 2} {'type': 'loss', 'content': 0.06977993249893188, 'timestamp': '2025-10-01 04:35:45.068773', 'step': 13621, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:45.128384', 'step': 13621, 'epoch': 2} {'type': 'loss', 'content': 0.12157004326581955, 'timestamp': '2025-10-01 04:35:45.131112', 'step': 13622, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:45.184613', 'step': 13622, 'epoch': 2} {'type': 'loss', 'content': 0.07715948671102524, 'timestamp': '2025-10-01 04:35:45.186881', 'step': 13623, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:45.241194', 'step': 13623, 'epoch': 2} {'type': 'loss', 'content': 0.12226679921150208, 'timestamp': '2025-10-01 04:35:45.247482', 'step': 13624, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:45.300718', 'step': 13624, 'epoch': 2} {'type': 'loss', 'content': 0.09346514940261841, 'timestamp': '2025-10-01 04:35:45.302781', 'step': 13625, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:45.356359', 'step': 13625, 'epoch': 2} {'type': 'loss', 'content': 0.07263189554214478, 'timestamp': '2025-10-01 04:35:45.358620', 'step': 13626, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:45.412628', 'step': 13626, 'epoch': 2} {'type': 'loss', 'content': 0.13766370713710785, 'timestamp': '2025-10-01 04:35:45.415629', 'step': 13627, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:45.469658', 'step': 13627, 'epoch': 2} {'type': 'loss', 'content': 0.103712297976017, 'timestamp': '2025-10-01 04:35:45.475749', 'step': 13628, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:45.528919', 'step': 13628, 'epoch': 2} {'type': 'loss', 'content': 0.13214309513568878, 'timestamp': '2025-10-01 04:35:45.532494', 'step': 13629, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:45.590888', 'step': 13629, 'epoch': 2} {'type': 'loss', 'content': 0.08689127117395401, 'timestamp': '2025-10-01 04:35:45.594299', 'step': 13630, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:45.648655', 'step': 13630, 'epoch': 2} {'type': 'loss', 'content': 0.13516318798065186, 'timestamp': '2025-10-01 04:35:45.650952', 'step': 13631, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:45.704275', 'step': 13631, 'epoch': 2} {'type': 'loss', 'content': 0.09485439956188202, 'timestamp': '2025-10-01 04:35:45.710114', 'step': 13632, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:45.762787', 'step': 13632, 'epoch': 2} {'type': 'loss', 'content': 0.07283692806959152, 'timestamp': '2025-10-01 04:35:45.765137', 'step': 13633, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:45.818358', 'step': 13633, 'epoch': 2} {'type': 'loss', 'content': 0.174414724111557, 'timestamp': '2025-10-01 04:35:45.827007', 'step': 13634, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:45.881268', 'step': 13634, 'epoch': 2} {'type': 'loss', 'content': 0.10117358714342117, 'timestamp': '2025-10-01 04:35:45.883909', 'step': 13635, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:45.937925', 'step': 13635, 'epoch': 2} {'type': 'loss', 'content': 0.10609351843595505, 'timestamp': '2025-10-01 04:35:45.944293', 'step': 13636, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:45.998179', 'step': 13636, 'epoch': 2} {'type': 'loss', 'content': 0.13765062391757965, 'timestamp': '2025-10-01 04:35:46.001092', 'step': 13637, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:46.055706', 'step': 13637, 'epoch': 2} {'type': 'loss', 'content': 0.1793927550315857, 'timestamp': '2025-10-01 04:35:46.058448', 'step': 13638, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:46.116426', 'step': 13638, 'epoch': 2} {'type': 'loss', 'content': 0.06990577280521393, 'timestamp': '2025-10-01 04:35:46.119081', 'step': 13639, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:46.172795', 'step': 13639, 'epoch': 2} {'type': 'loss', 'content': 0.13559146225452423, 'timestamp': '2025-10-01 04:35:46.178707', 'step': 13640, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:46.231862', 'step': 13640, 'epoch': 2} {'type': 'loss', 'content': 0.10134421288967133, 'timestamp': '2025-10-01 04:35:46.233927', 'step': 13641, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:46.287561', 'step': 13641, 'epoch': 2} {'type': 'loss', 'content': 0.1033327728509903, 'timestamp': '2025-10-01 04:35:46.289764', 'step': 13642, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:46.343919', 'step': 13642, 'epoch': 2} {'type': 'loss', 'content': 0.15457335114479065, 'timestamp': '2025-10-01 04:35:46.346145', 'step': 13643, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:46.400267', 'step': 13643, 'epoch': 2} {'type': 'loss', 'content': 0.14973081648349762, 'timestamp': '2025-10-01 04:35:46.406185', 'step': 13644, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:46.459191', 'step': 13644, 'epoch': 2} {'type': 'loss', 'content': 0.09866312146186829, 'timestamp': '2025-10-01 04:35:46.461703', 'step': 13645, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:35:46.516959', 'step': 13645, 'epoch': 2} {'type': 'loss', 'content': 0.10170682519674301, 'timestamp': '2025-10-01 04:35:46.519400', 'step': 13646, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:46.573276', 'step': 13646, 'epoch': 2} {'type': 'loss', 'content': 0.14996451139450073, 'timestamp': '2025-10-01 04:35:46.575500', 'step': 13647, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:46.629478', 'step': 13647, 'epoch': 2} {'type': 'loss', 'content': 0.08391930162906647, 'timestamp': '2025-10-01 04:35:46.635617', 'step': 13648, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:46.688731', 'step': 13648, 'epoch': 2} {'type': 'loss', 'content': 0.05030537396669388, 'timestamp': '2025-10-01 04:35:46.691218', 'step': 13649, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:46.745091', 'step': 13649, 'epoch': 2} {'type': 'loss', 'content': 0.15415643155574799, 'timestamp': '2025-10-01 04:35:46.751071', 'step': 13650, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:46.808740', 'step': 13650, 'epoch': 2} {'type': 'loss', 'content': 0.0717414990067482, 'timestamp': '2025-10-01 04:35:46.812910', 'step': 13651, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:35:46.882391', 'step': 13651, 'epoch': 2} {'type': 'loss', 'content': 0.21007975935935974, 'timestamp': '2025-10-01 04:35:46.888473', 'step': 13652, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:35:46.941664', 'step': 13652, 'epoch': 2} {'type': 'loss', 'content': 0.15125110745429993, 'timestamp': '2025-10-01 04:35:46.944778', 'step': 13653, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:47.000856', 'step': 13653, 'epoch': 2} {'type': 'loss', 'content': 0.17632994055747986, 'timestamp': '2025-10-01 04:35:47.003558', 'step': 13654, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:35:47.072074', 'step': 13654, 'epoch': 2} {'type': 'loss', 'content': 0.10911726206541061, 'timestamp': '2025-10-01 04:35:47.078367', 'step': 13655, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:47.137038', 'step': 13655, 'epoch': 2} {'type': 'loss', 'content': 0.0769498348236084, 'timestamp': '2025-10-01 04:35:47.143228', 'step': 13656, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:47.197501', 'step': 13656, 'epoch': 2} {'type': 'loss', 'content': 0.14906619489192963, 'timestamp': '2025-10-01 04:35:47.199781', 'step': 13657, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:35:47.255106', 'step': 13657, 'epoch': 2} {'type': 'loss', 'content': 0.04861938953399658, 'timestamp': '2025-10-01 04:35:47.257394', 'step': 13658, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:47.319532', 'step': 13658, 'epoch': 2} {'type': 'loss', 'content': 0.10464957356452942, 'timestamp': '2025-10-01 04:35:47.322521', 'step': 13659, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:47.376192', 'step': 13659, 'epoch': 2} {'type': 'loss', 'content': 0.0857941061258316, 'timestamp': '2025-10-01 04:35:47.382131', 'step': 13660, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:47.435902', 'step': 13660, 'epoch': 2} {'type': 'loss', 'content': 0.20079097151756287, 'timestamp': '2025-10-01 04:35:47.439548', 'step': 13661, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:47.492642', 'step': 13661, 'epoch': 2} {'type': 'loss', 'content': 0.10856615751981735, 'timestamp': '2025-10-01 04:35:47.498871', 'step': 13662, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:47.560603', 'step': 13662, 'epoch': 2} {'type': 'loss', 'content': 0.10687913745641708, 'timestamp': '2025-10-01 04:35:47.565932', 'step': 13663, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:47.620742', 'step': 13663, 'epoch': 2} {'type': 'loss', 'content': 0.15616731345653534, 'timestamp': '2025-10-01 04:35:47.626445', 'step': 13664, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:47.678994', 'step': 13664, 'epoch': 2} {'type': 'loss', 'content': 0.1016475185751915, 'timestamp': '2025-10-01 04:35:47.681066', 'step': 13665, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:47.733919', 'step': 13665, 'epoch': 2} {'type': 'loss', 'content': 0.21504053473472595, 'timestamp': '2025-10-01 04:35:47.736153', 'step': 13666, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:47.792373', 'step': 13666, 'epoch': 2} {'type': 'loss', 'content': 0.10014122724533081, 'timestamp': '2025-10-01 04:35:47.796867', 'step': 13667, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:35:47.852657', 'step': 13667, 'epoch': 2} {'type': 'loss', 'content': 0.11697567254304886, 'timestamp': '2025-10-01 04:35:47.858501', 'step': 13668, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:47.910574', 'step': 13668, 'epoch': 2} {'type': 'loss', 'content': 0.14306317269802094, 'timestamp': '2025-10-01 04:35:47.912861', 'step': 13669, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:35:47.965736', 'step': 13669, 'epoch': 2} {'type': 'loss', 'content': 0.15413756668567657, 'timestamp': '2025-10-01 04:35:47.967860', 'step': 13670, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:48.020937', 'step': 13670, 'epoch': 2} {'type': 'loss', 'content': 0.1604451984167099, 'timestamp': '2025-10-01 04:35:48.022734', 'step': 13671, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:48.075375', 'step': 13671, 'epoch': 2} {'type': 'loss', 'content': 0.14843621850013733, 'timestamp': '2025-10-01 04:35:48.081084', 'step': 13672, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:48.133540', 'step': 13672, 'epoch': 2} {'type': 'loss', 'content': 0.14185036718845367, 'timestamp': '2025-10-01 04:35:48.135643', 'step': 13673, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:35:48.189737', 'step': 13673, 'epoch': 2} {'type': 'loss', 'content': 0.10821881145238876, 'timestamp': '2025-10-01 04:35:48.191858', 'step': 13674, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:48.245080', 'step': 13674, 'epoch': 2} {'type': 'loss', 'content': 0.05212115868926048, 'timestamp': '2025-10-01 04:35:48.247311', 'step': 13675, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:48.300444', 'step': 13675, 'epoch': 2} {'type': 'loss', 'content': 0.1555652618408203, 'timestamp': '2025-10-01 04:35:48.306324', 'step': 13676, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:48.365837', 'step': 13676, 'epoch': 2} {'type': 'loss', 'content': 0.1054345965385437, 'timestamp': '2025-10-01 04:35:48.367984', 'step': 13677, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:48.420091', 'step': 13677, 'epoch': 2} {'type': 'loss', 'content': 0.21116746962070465, 'timestamp': '2025-10-01 04:35:48.423262', 'step': 13678, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:35:48.476330', 'step': 13678, 'epoch': 2} {'type': 'loss', 'content': 0.24135451018810272, 'timestamp': '2025-10-01 04:35:48.480053', 'step': 13679, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:35:48.535111', 'step': 13679, 'epoch': 2} {'type': 'loss', 'content': 0.12235833704471588, 'timestamp': '2025-10-01 04:35:48.541426', 'step': 13680, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:48.595812', 'step': 13680, 'epoch': 2} {'type': 'loss', 'content': 0.09182301163673401, 'timestamp': '2025-10-01 04:35:48.597974', 'step': 13681, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:35:48.651323', 'step': 13681, 'epoch': 2} {'type': 'loss', 'content': 0.11660022288560867, 'timestamp': '2025-10-01 04:35:48.654363', 'step': 13682, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:48.708552', 'step': 13682, 'epoch': 2} {'type': 'loss', 'content': 0.08063221722841263, 'timestamp': '2025-10-01 04:35:48.714245', 'step': 13683, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:48.770005', 'step': 13683, 'epoch': 2} {'type': 'loss', 'content': 0.09905368834733963, 'timestamp': '2025-10-01 04:35:48.775678', 'step': 13684, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:48.828059', 'step': 13684, 'epoch': 2} {'type': 'loss', 'content': 0.056443504989147186, 'timestamp': '2025-10-01 04:35:48.831724', 'step': 13685, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:48.890113', 'step': 13685, 'epoch': 2} {'type': 'loss', 'content': 0.10884761810302734, 'timestamp': '2025-10-01 04:35:48.892301', 'step': 13686, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:48.945502', 'step': 13686, 'epoch': 2} {'type': 'loss', 'content': 0.14861375093460083, 'timestamp': '2025-10-01 04:35:48.947623', 'step': 13687, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:49.000807', 'step': 13687, 'epoch': 2} {'type': 'loss', 'content': 0.2258531153202057, 'timestamp': '2025-10-01 04:35:49.006678', 'step': 13688, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:49.059339', 'step': 13688, 'epoch': 2} {'type': 'loss', 'content': 0.09147022664546967, 'timestamp': '2025-10-01 04:35:49.061803', 'step': 13689, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:49.114753', 'step': 13689, 'epoch': 2} {'type': 'loss', 'content': 0.12226942181587219, 'timestamp': '2025-10-01 04:35:49.117232', 'step': 13690, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:49.170369', 'step': 13690, 'epoch': 2} {'type': 'loss', 'content': 0.08278337866067886, 'timestamp': '2025-10-01 04:35:49.172660', 'step': 13691, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:49.225552', 'step': 13691, 'epoch': 2} {'type': 'loss', 'content': 0.20233182609081268, 'timestamp': '2025-10-01 04:35:49.231255', 'step': 13692, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:49.283697', 'step': 13692, 'epoch': 2} {'type': 'loss', 'content': 0.09393779188394547, 'timestamp': '2025-10-01 04:35:49.285821', 'step': 13693, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:49.338898', 'step': 13693, 'epoch': 2} {'type': 'loss', 'content': 0.1220037043094635, 'timestamp': '2025-10-01 04:35:49.341083', 'step': 13694, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:35:49.394567', 'step': 13694, 'epoch': 2} {'type': 'loss', 'content': 0.14036279916763306, 'timestamp': '2025-10-01 04:35:49.396827', 'step': 13695, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:49.455320', 'step': 13695, 'epoch': 2} {'type': 'loss', 'content': 0.09741579741239548, 'timestamp': '2025-10-01 04:35:49.461066', 'step': 13696, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:49.514055', 'step': 13696, 'epoch': 2} {'type': 'loss', 'content': 0.09936100989580154, 'timestamp': '2025-10-01 04:35:49.516162', 'step': 13697, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:35:49.568871', 'step': 13697, 'epoch': 2} {'type': 'loss', 'content': 0.06674017757177353, 'timestamp': '2025-10-01 04:35:49.571182', 'step': 13698, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:49.625002', 'step': 13698, 'epoch': 2} {'type': 'loss', 'content': 0.1750153750181198, 'timestamp': '2025-10-01 04:35:49.627380', 'step': 13699, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:49.680455', 'step': 13699, 'epoch': 2} {'type': 'loss', 'content': 0.20410408079624176, 'timestamp': '2025-10-01 04:35:49.686142', 'step': 13700, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:35:49.738607', 'step': 13700, 'epoch': 2} {'type': 'loss', 'content': 0.11503042280673981, 'timestamp': '2025-10-01 04:35:49.740689', 'step': 13701, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:49.793852', 'step': 13701, 'epoch': 2} {'type': 'loss', 'content': 0.21136924624443054, 'timestamp': '2025-10-01 04:35:49.795799', 'step': 13702, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:49.848422', 'step': 13702, 'epoch': 2} {'type': 'loss', 'content': 0.11069133877754211, 'timestamp': '2025-10-01 04:35:49.850555', 'step': 13703, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:35:49.903471', 'step': 13703, 'epoch': 2} {'type': 'loss', 'content': 0.11995552480220795, 'timestamp': '2025-10-01 04:35:49.909386', 'step': 13704, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:49.962822', 'step': 13704, 'epoch': 2} {'type': 'loss', 'content': 0.033851101994514465, 'timestamp': '2025-10-01 04:35:49.965311', 'step': 13705, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:35:50.019483', 'step': 13705, 'epoch': 2} {'type': 'loss', 'content': 0.08409149199724197, 'timestamp': '2025-10-01 04:35:50.022919', 'step': 13706, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:35:50.083036', 'step': 13706, 'epoch': 2} {'type': 'loss', 'content': 0.13947176933288574, 'timestamp': '2025-10-01 04:35:50.084831', 'step': 13707, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:50.144557', 'step': 13707, 'epoch': 2} {'type': 'loss', 'content': 0.07498928904533386, 'timestamp': '2025-10-01 04:35:50.149779', 'step': 13708, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:50.202036', 'step': 13708, 'epoch': 2} {'type': 'loss', 'content': 0.1051718071103096, 'timestamp': '2025-10-01 04:35:50.204121', 'step': 13709, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:50.258250', 'step': 13709, 'epoch': 2} {'type': 'loss', 'content': 0.1912955343723297, 'timestamp': '2025-10-01 04:35:50.260295', 'step': 13710, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:50.315160', 'step': 13710, 'epoch': 2} {'type': 'loss', 'content': 0.10909593105316162, 'timestamp': '2025-10-01 04:35:50.317357', 'step': 13711, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:50.370686', 'step': 13711, 'epoch': 2} {'type': 'loss', 'content': 0.08893287926912308, 'timestamp': '2025-10-01 04:35:50.376687', 'step': 13712, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:50.429969', 'step': 13712, 'epoch': 2} {'type': 'loss', 'content': 0.06525711715221405, 'timestamp': '2025-10-01 04:35:50.432093', 'step': 13713, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:50.485280', 'step': 13713, 'epoch': 2} {'type': 'loss', 'content': 0.2425399124622345, 'timestamp': '2025-10-01 04:35:50.487279', 'step': 13714, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:50.540457', 'step': 13714, 'epoch': 2} {'type': 'loss', 'content': 0.1942613422870636, 'timestamp': '2025-10-01 04:35:50.542245', 'step': 13715, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:35:50.595316', 'step': 13715, 'epoch': 2} {'type': 'loss', 'content': 0.10182270407676697, 'timestamp': '2025-10-01 04:35:50.601885', 'step': 13716, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:50.653988', 'step': 13716, 'epoch': 2} {'type': 'loss', 'content': 0.09663542360067368, 'timestamp': '2025-10-01 04:35:50.656995', 'step': 13717, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:50.710066', 'step': 13717, 'epoch': 2} {'type': 'loss', 'content': 0.11708401888608932, 'timestamp': '2025-10-01 04:35:50.712306', 'step': 13718, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:35:50.766050', 'step': 13718, 'epoch': 2} {'type': 'loss', 'content': 0.16875901818275452, 'timestamp': '2025-10-01 04:35:50.768806', 'step': 13719, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:50.831186', 'step': 13719, 'epoch': 2} {'type': 'loss', 'content': 0.11089145392179489, 'timestamp': '2025-10-01 04:35:50.837107', 'step': 13720, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:50.890145', 'step': 13720, 'epoch': 2} {'type': 'loss', 'content': 0.04734214022755623, 'timestamp': '2025-10-01 04:35:50.891676', 'step': 13721, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:50.945295', 'step': 13721, 'epoch': 2} {'type': 'loss', 'content': 0.05319410562515259, 'timestamp': '2025-10-01 04:35:50.948895', 'step': 13722, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:51.002655', 'step': 13722, 'epoch': 2} {'type': 'loss', 'content': 0.10879085958003998, 'timestamp': '2025-10-01 04:35:51.004623', 'step': 13723, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:51.058162', 'step': 13723, 'epoch': 2} {'type': 'loss', 'content': 0.14712195098400116, 'timestamp': '2025-10-01 04:35:51.063902', 'step': 13724, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:51.122725', 'step': 13724, 'epoch': 2} {'type': 'loss', 'content': 0.1001184731721878, 'timestamp': '2025-10-01 04:35:51.125142', 'step': 13725, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:51.180267', 'step': 13725, 'epoch': 2} {'type': 'loss', 'content': 0.1704450100660324, 'timestamp': '2025-10-01 04:35:51.183513', 'step': 13726, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:51.237418', 'step': 13726, 'epoch': 2} {'type': 'loss', 'content': 0.12543539702892303, 'timestamp': '2025-10-01 04:35:51.239533', 'step': 13727, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:51.301120', 'step': 13727, 'epoch': 2} {'type': 'loss', 'content': 0.13020004332065582, 'timestamp': '2025-10-01 04:35:51.306855', 'step': 13728, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:51.368209', 'step': 13728, 'epoch': 2} {'type': 'loss', 'content': 0.11932792514562607, 'timestamp': '2025-10-01 04:35:51.369913', 'step': 13729, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:35:51.426170', 'step': 13729, 'epoch': 2} {'type': 'loss', 'content': 0.1408580094575882, 'timestamp': '2025-10-01 04:35:51.428301', 'step': 13730, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:35:51.482204', 'step': 13730, 'epoch': 2} {'type': 'loss', 'content': 0.14681024849414825, 'timestamp': '2025-10-01 04:35:51.484084', 'step': 13731, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:51.537083', 'step': 13731, 'epoch': 2} {'type': 'loss', 'content': 0.09624692797660828, 'timestamp': '2025-10-01 04:35:51.543421', 'step': 13732, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:51.606402', 'step': 13732, 'epoch': 2} {'type': 'loss', 'content': 0.08082202821969986, 'timestamp': '2025-10-01 04:35:51.608647', 'step': 13733, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:35:51.663221', 'step': 13733, 'epoch': 2} {'type': 'loss', 'content': 0.14397217333316803, 'timestamp': '2025-10-01 04:35:51.665565', 'step': 13734, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:51.718733', 'step': 13734, 'epoch': 2} {'type': 'loss', 'content': 0.10946804285049438, 'timestamp': '2025-10-01 04:35:51.726413', 'step': 13735, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:51.779440', 'step': 13735, 'epoch': 2} {'type': 'loss', 'content': 0.22870078682899475, 'timestamp': '2025-10-01 04:35:51.784805', 'step': 13736, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:51.837007', 'step': 13736, 'epoch': 2} {'type': 'loss', 'content': 0.0661296397447586, 'timestamp': '2025-10-01 04:35:51.844165', 'step': 13737, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:51.899264', 'step': 13737, 'epoch': 2} {'type': 'loss', 'content': 0.18229125440120697, 'timestamp': '2025-10-01 04:35:51.901955', 'step': 13738, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:51.955917', 'step': 13738, 'epoch': 2} {'type': 'loss', 'content': 0.10417089611291885, 'timestamp': '2025-10-01 04:35:51.974141', 'step': 13739, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:35:52.027886', 'step': 13739, 'epoch': 2} {'type': 'loss', 'content': 0.10398130863904953, 'timestamp': '2025-10-01 04:35:52.037086', 'step': 13740, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:52.090318', 'step': 13740, 'epoch': 2} {'type': 'loss', 'content': 0.13578806817531586, 'timestamp': '2025-10-01 04:35:52.092324', 'step': 13741, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:52.147074', 'step': 13741, 'epoch': 2} {'type': 'loss', 'content': 0.06620772927999496, 'timestamp': '2025-10-01 04:35:52.148689', 'step': 13742, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:52.203520', 'step': 13742, 'epoch': 2} {'type': 'loss', 'content': 0.22665703296661377, 'timestamp': '2025-10-01 04:35:52.205242', 'step': 13743, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:52.260077', 'step': 13743, 'epoch': 2} {'type': 'loss', 'content': 0.1240328773856163, 'timestamp': '2025-10-01 04:35:52.266199', 'step': 13744, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:52.323686', 'step': 13744, 'epoch': 2} {'type': 'loss', 'content': 0.13319425284862518, 'timestamp': '2025-10-01 04:35:52.325942', 'step': 13745, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:52.380640', 'step': 13745, 'epoch': 2} {'type': 'loss', 'content': 0.13731569051742554, 'timestamp': '2025-10-01 04:35:52.382779', 'step': 13746, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:52.436136', 'step': 13746, 'epoch': 2} {'type': 'loss', 'content': 0.12193409353494644, 'timestamp': '2025-10-01 04:35:52.438573', 'step': 13747, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:35:52.501148', 'step': 13747, 'epoch': 2} {'type': 'loss', 'content': 0.1677403450012207, 'timestamp': '2025-10-01 04:35:52.506592', 'step': 13748, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:52.560353', 'step': 13748, 'epoch': 2} {'type': 'loss', 'content': 0.14137642085552216, 'timestamp': '2025-10-01 04:35:52.565225', 'step': 13749, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:52.623901', 'step': 13749, 'epoch': 2} {'type': 'loss', 'content': 0.12556873261928558, 'timestamp': '2025-10-01 04:35:52.625539', 'step': 13750, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:35:52.678746', 'step': 13750, 'epoch': 2} {'type': 'loss', 'content': 0.18619440495967865, 'timestamp': '2025-10-01 04:35:52.680528', 'step': 13751, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:52.738544', 'step': 13751, 'epoch': 2} {'type': 'loss', 'content': 0.19316913187503815, 'timestamp': '2025-10-01 04:35:52.745422', 'step': 13752, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:35:52.798588', 'step': 13752, 'epoch': 2} {'type': 'loss', 'content': 0.08226918429136276, 'timestamp': '2025-10-01 04:35:52.802947', 'step': 13753, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:52.861534', 'step': 13753, 'epoch': 2} {'type': 'loss', 'content': 0.26678627729415894, 'timestamp': '2025-10-01 04:35:52.867876', 'step': 13754, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:35:52.921596', 'step': 13754, 'epoch': 2} {'type': 'loss', 'content': 0.17804884910583496, 'timestamp': '2025-10-01 04:35:52.923691', 'step': 13755, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:52.981060', 'step': 13755, 'epoch': 2} {'type': 'loss', 'content': 0.07229015976190567, 'timestamp': '2025-10-01 04:35:52.986326', 'step': 13756, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:35:53.042770', 'step': 13756, 'epoch': 2} {'type': 'loss', 'content': 0.09240525960922241, 'timestamp': '2025-10-01 04:35:53.044555', 'step': 13757, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:53.097008', 'step': 13757, 'epoch': 2} {'type': 'loss', 'content': 0.14506711065769196, 'timestamp': '2025-10-01 04:35:53.099129', 'step': 13758, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:35:53.153060', 'step': 13758, 'epoch': 2} {'type': 'loss', 'content': 0.10329916328191757, 'timestamp': '2025-10-01 04:35:53.155448', 'step': 13759, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:53.208490', 'step': 13759, 'epoch': 2} {'type': 'loss', 'content': 0.2095208764076233, 'timestamp': '2025-10-01 04:35:53.214245', 'step': 13760, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:53.268741', 'step': 13760, 'epoch': 2} {'type': 'loss', 'content': 0.14942309260368347, 'timestamp': '2025-10-01 04:35:53.271197', 'step': 13761, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:53.329748', 'step': 13761, 'epoch': 2} {'type': 'loss', 'content': 0.07739579677581787, 'timestamp': '2025-10-01 04:35:53.334119', 'step': 13762, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:35:53.397630', 'step': 13762, 'epoch': 2} {'type': 'loss', 'content': 0.0787787064909935, 'timestamp': '2025-10-01 04:35:53.399671', 'step': 13763, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:53.454212', 'step': 13763, 'epoch': 2} {'type': 'loss', 'content': 0.10631049424409866, 'timestamp': '2025-10-01 04:35:53.473648', 'step': 13764, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:35:53.545295', 'step': 13764, 'epoch': 2} {'type': 'loss', 'content': 0.10333231836557388, 'timestamp': '2025-10-01 04:35:53.548201', 'step': 13765, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:53.603266', 'step': 13765, 'epoch': 2} {'type': 'loss', 'content': 0.10298620909452438, 'timestamp': '2025-10-01 04:35:53.618535', 'step': 13766, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:53.677413', 'step': 13766, 'epoch': 2} {'type': 'loss', 'content': 0.19164925813674927, 'timestamp': '2025-10-01 04:35:53.679592', 'step': 13767, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-01 04:35:53.732584', 'step': 13767, 'epoch': 2} {'type': 'loss', 'content': 0.05228002741932869, 'timestamp': '2025-10-01 04:35:53.738009', 'step': 13768, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:53.790557', 'step': 13768, 'epoch': 2} {'type': 'loss', 'content': 0.06626889109611511, 'timestamp': '2025-10-01 04:35:53.792262', 'step': 13769, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:53.844976', 'step': 13769, 'epoch': 2} {'type': 'loss', 'content': 0.11242874711751938, 'timestamp': '2025-10-01 04:35:53.846941', 'step': 13770, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:53.900471', 'step': 13770, 'epoch': 2} {'type': 'loss', 'content': 0.11845020949840546, 'timestamp': '2025-10-01 04:35:53.902712', 'step': 13771, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:53.956743', 'step': 13771, 'epoch': 2} {'type': 'loss', 'content': 0.047568462789058685, 'timestamp': '2025-10-01 04:35:53.963252', 'step': 13772, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:54.017102', 'step': 13772, 'epoch': 2} {'type': 'loss', 'content': 0.11896408349275589, 'timestamp': '2025-10-01 04:35:54.019330', 'step': 13773, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:54.073037', 'step': 13773, 'epoch': 2} {'type': 'loss', 'content': 0.10798995196819305, 'timestamp': '2025-10-01 04:35:54.075165', 'step': 13774, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:54.128236', 'step': 13774, 'epoch': 2} {'type': 'loss', 'content': 0.21795547008514404, 'timestamp': '2025-10-01 04:35:54.130892', 'step': 13775, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:54.188519', 'step': 13775, 'epoch': 2} {'type': 'loss', 'content': 0.17503273487091064, 'timestamp': '2025-10-01 04:35:54.194377', 'step': 13776, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:54.252062', 'step': 13776, 'epoch': 2} {'type': 'loss', 'content': 0.19581928849220276, 'timestamp': '2025-10-01 04:35:54.254483', 'step': 13777, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:54.308232', 'step': 13777, 'epoch': 2} {'type': 'loss', 'content': 0.2287149280309677, 'timestamp': '2025-10-01 04:35:54.310845', 'step': 13778, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:54.364037', 'step': 13778, 'epoch': 2} {'type': 'loss', 'content': 0.22516478598117828, 'timestamp': '2025-10-01 04:35:54.366557', 'step': 13779, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:35:54.421116', 'step': 13779, 'epoch': 2} {'type': 'loss', 'content': 0.13413630425930023, 'timestamp': '2025-10-01 04:35:54.426786', 'step': 13780, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:54.485401', 'step': 13780, 'epoch': 2} {'type': 'loss', 'content': 0.19103950262069702, 'timestamp': '2025-10-01 04:35:54.487674', 'step': 13781, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:54.541252', 'step': 13781, 'epoch': 2} {'type': 'loss', 'content': 0.24618026614189148, 'timestamp': '2025-10-01 04:35:54.543476', 'step': 13782, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:35:54.596793', 'step': 13782, 'epoch': 2} {'type': 'loss', 'content': 0.10664647072553635, 'timestamp': '2025-10-01 04:35:54.610025', 'step': 13783, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:54.664996', 'step': 13783, 'epoch': 2} {'type': 'loss', 'content': 0.09402281790971756, 'timestamp': '2025-10-01 04:35:54.670845', 'step': 13784, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:54.723741', 'step': 13784, 'epoch': 2} {'type': 'loss', 'content': 0.13559657335281372, 'timestamp': '2025-10-01 04:35:54.725849', 'step': 13785, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:35:54.779287', 'step': 13785, 'epoch': 2} {'type': 'loss', 'content': 0.08374541252851486, 'timestamp': '2025-10-01 04:35:54.781360', 'step': 13786, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:54.834505', 'step': 13786, 'epoch': 2} {'type': 'loss', 'content': 0.059343256056308746, 'timestamp': '2025-10-01 04:35:54.841054', 'step': 13787, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:54.897920', 'step': 13787, 'epoch': 2} {'type': 'loss', 'content': 0.08839233964681625, 'timestamp': '2025-10-01 04:35:54.904030', 'step': 13788, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:54.957246', 'step': 13788, 'epoch': 2} {'type': 'loss', 'content': 0.10029923170804977, 'timestamp': '2025-10-01 04:35:54.959590', 'step': 13789, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:55.012964', 'step': 13789, 'epoch': 2} {'type': 'loss', 'content': 0.09456892311573029, 'timestamp': '2025-10-01 04:35:55.015433', 'step': 13790, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:55.080144', 'step': 13790, 'epoch': 2} {'type': 'loss', 'content': 0.13532105088233948, 'timestamp': '2025-10-01 04:35:55.082572', 'step': 13791, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:55.136706', 'step': 13791, 'epoch': 2} {'type': 'loss', 'content': 0.09052422642707825, 'timestamp': '2025-10-01 04:35:55.142603', 'step': 13792, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:35:55.196407', 'step': 13792, 'epoch': 2} {'type': 'loss', 'content': 0.07150419056415558, 'timestamp': '2025-10-01 04:35:55.198570', 'step': 13793, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:55.252106', 'step': 13793, 'epoch': 2} {'type': 'loss', 'content': 0.13922491669654846, 'timestamp': '2025-10-01 04:35:55.254891', 'step': 13794, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:55.309853', 'step': 13794, 'epoch': 2} {'type': 'loss', 'content': 0.1357535421848297, 'timestamp': '2025-10-01 04:35:55.312103', 'step': 13795, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:55.365896', 'step': 13795, 'epoch': 2} {'type': 'loss', 'content': 0.17308463156223297, 'timestamp': '2025-10-01 04:35:55.371682', 'step': 13796, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:35:55.436237', 'step': 13796, 'epoch': 2} {'type': 'loss', 'content': 0.11901059746742249, 'timestamp': '2025-10-01 04:35:55.438598', 'step': 13797, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:35:55.492721', 'step': 13797, 'epoch': 2} {'type': 'loss', 'content': 0.11411470919847488, 'timestamp': '2025-10-01 04:35:55.507856', 'step': 13798, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:35:55.561709', 'step': 13798, 'epoch': 2} {'type': 'loss', 'content': 0.11676883697509766, 'timestamp': '2025-10-01 04:35:55.564248', 'step': 13799, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:55.617731', 'step': 13799, 'epoch': 2} {'type': 'loss', 'content': 0.11838265508413315, 'timestamp': '2025-10-01 04:35:55.623816', 'step': 13800, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:55.676500', 'step': 13800, 'epoch': 2} {'type': 'loss', 'content': 0.1251186728477478, 'timestamp': '2025-10-01 04:35:55.679005', 'step': 13801, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:55.742929', 'step': 13801, 'epoch': 2} {'type': 'loss', 'content': 0.13476721942424774, 'timestamp': '2025-10-01 04:35:55.745265', 'step': 13802, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:55.799296', 'step': 13802, 'epoch': 2} {'type': 'loss', 'content': 0.17929673194885254, 'timestamp': '2025-10-01 04:35:55.812789', 'step': 13803, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:55.869874', 'step': 13803, 'epoch': 2} {'type': 'loss', 'content': 0.06943757086992264, 'timestamp': '2025-10-01 04:35:55.886494', 'step': 13804, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:55.939566', 'step': 13804, 'epoch': 2} {'type': 'loss', 'content': 0.17837533354759216, 'timestamp': '2025-10-01 04:35:55.942037', 'step': 13805, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:55.995831', 'step': 13805, 'epoch': 2} {'type': 'loss', 'content': 0.09312010556459427, 'timestamp': '2025-10-01 04:35:55.998355', 'step': 13806, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:56.052686', 'step': 13806, 'epoch': 2} {'type': 'loss', 'content': 0.10652024298906326, 'timestamp': '2025-10-01 04:35:56.055331', 'step': 13807, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:56.110485', 'step': 13807, 'epoch': 2} {'type': 'loss', 'content': 0.09677784889936447, 'timestamp': '2025-10-01 04:35:56.116111', 'step': 13808, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:56.171594', 'step': 13808, 'epoch': 2} {'type': 'loss', 'content': 0.09386774897575378, 'timestamp': '2025-10-01 04:35:56.173989', 'step': 13809, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:56.228570', 'step': 13809, 'epoch': 2} {'type': 'loss', 'content': 0.12326434254646301, 'timestamp': '2025-10-01 04:35:56.231038', 'step': 13810, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:56.286364', 'step': 13810, 'epoch': 2} {'type': 'loss', 'content': 0.07954524457454681, 'timestamp': '2025-10-01 04:35:56.288881', 'step': 13811, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:35:56.342649', 'step': 13811, 'epoch': 2} {'type': 'loss', 'content': 0.09884937107563019, 'timestamp': '2025-10-01 04:35:56.348699', 'step': 13812, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:56.403289', 'step': 13812, 'epoch': 2} {'type': 'loss', 'content': 0.09928283095359802, 'timestamp': '2025-10-01 04:35:56.405493', 'step': 13813, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:56.459445', 'step': 13813, 'epoch': 2} {'type': 'loss', 'content': 0.12024527043104172, 'timestamp': '2025-10-01 04:35:56.462101', 'step': 13814, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:56.523351', 'step': 13814, 'epoch': 2} {'type': 'loss', 'content': 0.13667336106300354, 'timestamp': '2025-10-01 04:35:56.525784', 'step': 13815, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:56.581633', 'step': 13815, 'epoch': 2} {'type': 'loss', 'content': 0.12321019172668457, 'timestamp': '2025-10-01 04:35:56.587420', 'step': 13816, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:35:56.647253', 'step': 13816, 'epoch': 2} {'type': 'loss', 'content': 0.10054793953895569, 'timestamp': '2025-10-01 04:35:56.661535', 'step': 13817, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:56.714950', 'step': 13817, 'epoch': 2} {'type': 'loss', 'content': 0.14966878294944763, 'timestamp': '2025-10-01 04:35:56.717047', 'step': 13818, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:35:56.770317', 'step': 13818, 'epoch': 2} {'type': 'loss', 'content': 0.13509368896484375, 'timestamp': '2025-10-01 04:35:56.772587', 'step': 13819, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:56.826282', 'step': 13819, 'epoch': 2} {'type': 'loss', 'content': 0.21630196273326874, 'timestamp': '2025-10-01 04:35:56.832164', 'step': 13820, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:56.884585', 'step': 13820, 'epoch': 2} {'type': 'loss', 'content': 0.07321443408727646, 'timestamp': '2025-10-01 04:35:56.886769', 'step': 13821, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:56.940542', 'step': 13821, 'epoch': 2} {'type': 'loss', 'content': 0.11221970617771149, 'timestamp': '2025-10-01 04:35:56.942720', 'step': 13822, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:56.999939', 'step': 13822, 'epoch': 2} {'type': 'loss', 'content': 0.11387402564287186, 'timestamp': '2025-10-01 04:35:57.002059', 'step': 13823, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:57.055256', 'step': 13823, 'epoch': 2} {'type': 'loss', 'content': 0.2032458335161209, 'timestamp': '2025-10-01 04:35:57.061460', 'step': 13824, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:57.114833', 'step': 13824, 'epoch': 2} {'type': 'loss', 'content': 0.10015060752630234, 'timestamp': '2025-10-01 04:35:57.116784', 'step': 13825, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:57.171932', 'step': 13825, 'epoch': 2} {'type': 'loss', 'content': 0.08405333757400513, 'timestamp': '2025-10-01 04:35:57.174159', 'step': 13826, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:57.227936', 'step': 13826, 'epoch': 2} {'type': 'loss', 'content': 0.08623824268579483, 'timestamp': '2025-10-01 04:35:57.229917', 'step': 13827, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:35:57.283695', 'step': 13827, 'epoch': 2} {'type': 'loss', 'content': 0.10889788717031479, 'timestamp': '2025-10-01 04:35:57.289872', 'step': 13828, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:35:57.342598', 'step': 13828, 'epoch': 2} {'type': 'loss', 'content': 0.1295863837003708, 'timestamp': '2025-10-01 04:35:57.344726', 'step': 13829, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:57.398164', 'step': 13829, 'epoch': 2} {'type': 'loss', 'content': 0.1685272604227066, 'timestamp': '2025-10-01 04:35:57.400254', 'step': 13830, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:57.454026', 'step': 13830, 'epoch': 2} {'type': 'loss', 'content': 0.11513932049274445, 'timestamp': '2025-10-01 04:35:57.455670', 'step': 13831, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:57.508322', 'step': 13831, 'epoch': 2} {'type': 'loss', 'content': 0.07008277624845505, 'timestamp': '2025-10-01 04:35:57.515009', 'step': 13832, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:57.567529', 'step': 13832, 'epoch': 2} {'type': 'loss', 'content': 0.10643020272254944, 'timestamp': '2025-10-01 04:35:57.569754', 'step': 13833, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:57.623048', 'step': 13833, 'epoch': 2} {'type': 'loss', 'content': 0.06576795130968094, 'timestamp': '2025-10-01 04:35:57.625353', 'step': 13834, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:57.679423', 'step': 13834, 'epoch': 2} {'type': 'loss', 'content': 0.10446050763130188, 'timestamp': '2025-10-01 04:35:57.681127', 'step': 13835, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:57.733634', 'step': 13835, 'epoch': 2} {'type': 'loss', 'content': 0.1424129754304886, 'timestamp': '2025-10-01 04:35:57.739465', 'step': 13836, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:57.792281', 'step': 13836, 'epoch': 2} {'type': 'loss', 'content': 0.11914834380149841, 'timestamp': '2025-10-01 04:35:57.794523', 'step': 13837, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:57.847436', 'step': 13837, 'epoch': 2} {'type': 'loss', 'content': 0.0814921110868454, 'timestamp': '2025-10-01 04:35:57.849675', 'step': 13838, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:35:57.902984', 'step': 13838, 'epoch': 2} {'type': 'loss', 'content': 0.11415137350559235, 'timestamp': '2025-10-01 04:35:57.905137', 'step': 13839, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:57.958973', 'step': 13839, 'epoch': 2} {'type': 'loss', 'content': 0.17711575329303741, 'timestamp': '2025-10-01 04:35:57.965356', 'step': 13840, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:58.025156', 'step': 13840, 'epoch': 2} {'type': 'loss', 'content': 0.0821700394153595, 'timestamp': '2025-10-01 04:35:58.029209', 'step': 13841, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:35:58.083337', 'step': 13841, 'epoch': 2} {'type': 'loss', 'content': 0.11426705867052078, 'timestamp': '2025-10-01 04:35:58.085411', 'step': 13842, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:58.138822', 'step': 13842, 'epoch': 2} {'type': 'loss', 'content': 0.07816239446401596, 'timestamp': '2025-10-01 04:35:58.141415', 'step': 13843, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:58.194896', 'step': 13843, 'epoch': 2} {'type': 'loss', 'content': 0.09032168984413147, 'timestamp': '2025-10-01 04:35:58.200927', 'step': 13844, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:58.254689', 'step': 13844, 'epoch': 2} {'type': 'loss', 'content': 0.14593398571014404, 'timestamp': '2025-10-01 04:35:58.262812', 'step': 13845, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:58.315598', 'step': 13845, 'epoch': 2} {'type': 'loss', 'content': 0.17665517330169678, 'timestamp': '2025-10-01 04:35:58.318397', 'step': 13846, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:35:58.372970', 'step': 13846, 'epoch': 2} {'type': 'loss', 'content': 0.20755891501903534, 'timestamp': '2025-10-01 04:35:58.375096', 'step': 13847, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:58.429406', 'step': 13847, 'epoch': 2} {'type': 'loss', 'content': 0.19248360395431519, 'timestamp': '2025-10-01 04:35:58.436513', 'step': 13848, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:58.495000', 'step': 13848, 'epoch': 2} {'type': 'loss', 'content': 0.12529318034648895, 'timestamp': '2025-10-01 04:35:58.497900', 'step': 13849, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:35:58.559413', 'step': 13849, 'epoch': 2} {'type': 'loss', 'content': 0.10555750131607056, 'timestamp': '2025-10-01 04:35:58.562318', 'step': 13850, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:58.636735', 'step': 13850, 'epoch': 2} {'type': 'loss', 'content': 0.10296224802732468, 'timestamp': '2025-10-01 04:35:58.640616', 'step': 13851, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:58.710349', 'step': 13851, 'epoch': 2} {'type': 'loss', 'content': 0.2011903077363968, 'timestamp': '2025-10-01 04:35:58.717878', 'step': 13852, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:58.776556', 'step': 13852, 'epoch': 2} {'type': 'loss', 'content': 0.09745541214942932, 'timestamp': '2025-10-01 04:35:58.779190', 'step': 13853, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:58.838167', 'step': 13853, 'epoch': 2} {'type': 'loss', 'content': 0.14786715805530548, 'timestamp': '2025-10-01 04:35:58.840295', 'step': 13854, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:58.898860', 'step': 13854, 'epoch': 2} {'type': 'loss', 'content': 0.10749839246273041, 'timestamp': '2025-10-01 04:35:58.901244', 'step': 13855, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:35:58.961576', 'step': 13855, 'epoch': 2} {'type': 'loss', 'content': 0.16397489607334137, 'timestamp': '2025-10-01 04:35:58.968895', 'step': 13856, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:59.037397', 'step': 13856, 'epoch': 2} {'type': 'loss', 'content': 0.19479474425315857, 'timestamp': '2025-10-01 04:35:59.040061', 'step': 13857, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:59.099093', 'step': 13857, 'epoch': 2} {'type': 'loss', 'content': 0.14110784232616425, 'timestamp': '2025-10-01 04:35:59.101539', 'step': 13858, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:59.160999', 'step': 13858, 'epoch': 2} {'type': 'loss', 'content': 0.1539076864719391, 'timestamp': '2025-10-01 04:35:59.163364', 'step': 13859, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:59.221765', 'step': 13859, 'epoch': 2} {'type': 'loss', 'content': 0.09210024774074554, 'timestamp': '2025-10-01 04:35:59.228679', 'step': 13860, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:59.292230', 'step': 13860, 'epoch': 2} {'type': 'loss', 'content': 0.15711629390716553, 'timestamp': '2025-10-01 04:35:59.294289', 'step': 13861, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:59.347278', 'step': 13861, 'epoch': 2} {'type': 'loss', 'content': 0.14430944621562958, 'timestamp': '2025-10-01 04:35:59.349793', 'step': 13862, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:59.404313', 'step': 13862, 'epoch': 2} {'type': 'loss', 'content': 0.16203823685646057, 'timestamp': '2025-10-01 04:35:59.406794', 'step': 13863, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:59.462417', 'step': 13863, 'epoch': 2} {'type': 'loss', 'content': 0.13840369880199432, 'timestamp': '2025-10-01 04:35:59.468676', 'step': 13864, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:59.524219', 'step': 13864, 'epoch': 2} {'type': 'loss', 'content': 0.2208525389432907, 'timestamp': '2025-10-01 04:35:59.526493', 'step': 13865, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:59.594724', 'step': 13865, 'epoch': 2} {'type': 'loss', 'content': 0.07298993319272995, 'timestamp': '2025-10-01 04:35:59.596898', 'step': 13866, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:35:59.657233', 'step': 13866, 'epoch': 2} {'type': 'loss', 'content': 0.15607531368732452, 'timestamp': '2025-10-01 04:35:59.660608', 'step': 13867, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:35:59.713456', 'step': 13867, 'epoch': 2} {'type': 'loss', 'content': 0.07432491332292557, 'timestamp': '2025-10-01 04:35:59.719551', 'step': 13868, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:59.772242', 'step': 13868, 'epoch': 2} {'type': 'loss', 'content': 0.08672734349966049, 'timestamp': '2025-10-01 04:35:59.774756', 'step': 13869, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:59.829197', 'step': 13869, 'epoch': 2} {'type': 'loss', 'content': 0.09424096345901489, 'timestamp': '2025-10-01 04:35:59.831343', 'step': 13870, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:35:59.884848', 'step': 13870, 'epoch': 2} {'type': 'loss', 'content': 0.06107364222407341, 'timestamp': '2025-10-01 04:35:59.887323', 'step': 13871, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:35:59.941214', 'step': 13871, 'epoch': 2} {'type': 'loss', 'content': 0.08351606875658035, 'timestamp': '2025-10-01 04:35:59.947386', 'step': 13872, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:36:00.000532', 'step': 13872, 'epoch': 2} {'type': 'loss', 'content': 0.08514930307865143, 'timestamp': '2025-10-01 04:36:00.002896', 'step': 13873, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:36:00.056304', 'step': 13873, 'epoch': 2} {'type': 'loss', 'content': 0.19615334272384644, 'timestamp': '2025-10-01 04:36:00.058644', 'step': 13874, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:00.112231', 'step': 13874, 'epoch': 2} {'type': 'loss', 'content': 0.1070222482085228, 'timestamp': '2025-10-01 04:36:00.114424', 'step': 13875, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:00.168125', 'step': 13875, 'epoch': 2} {'type': 'loss', 'content': 0.14944152534008026, 'timestamp': '2025-10-01 04:36:00.180687', 'step': 13876, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:00.235864', 'step': 13876, 'epoch': 2} {'type': 'loss', 'content': 0.060817915946245193, 'timestamp': '2025-10-01 04:36:00.238256', 'step': 13877, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:00.293059', 'step': 13877, 'epoch': 2} {'type': 'loss', 'content': 0.06625467538833618, 'timestamp': '2025-10-01 04:36:00.295496', 'step': 13878, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:00.349693', 'step': 13878, 'epoch': 2} {'type': 'loss', 'content': 0.14376862347126007, 'timestamp': '2025-10-01 04:36:00.353672', 'step': 13879, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:00.406791', 'step': 13879, 'epoch': 2} {'type': 'loss', 'content': 0.11095032095909119, 'timestamp': '2025-10-01 04:36:00.412771', 'step': 13880, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:00.465532', 'step': 13880, 'epoch': 2} {'type': 'loss', 'content': 0.0902811661362648, 'timestamp': '2025-10-01 04:36:00.467799', 'step': 13881, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:00.521129', 'step': 13881, 'epoch': 2} {'type': 'loss', 'content': 0.1928931176662445, 'timestamp': '2025-10-01 04:36:00.524090', 'step': 13882, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:00.580819', 'step': 13882, 'epoch': 2} {'type': 'loss', 'content': 0.0876472070813179, 'timestamp': '2025-10-01 04:36:00.593023', 'step': 13883, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:36:00.646250', 'step': 13883, 'epoch': 2} {'type': 'loss', 'content': 0.10935579985380173, 'timestamp': '2025-10-01 04:36:00.652132', 'step': 13884, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:00.705336', 'step': 13884, 'epoch': 2} {'type': 'loss', 'content': 0.058112602680921555, 'timestamp': '2025-10-01 04:36:00.707870', 'step': 13885, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:00.761543', 'step': 13885, 'epoch': 2} {'type': 'loss', 'content': 0.09015708416700363, 'timestamp': '2025-10-01 04:36:00.763734', 'step': 13886, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:00.822730', 'step': 13886, 'epoch': 2} {'type': 'loss', 'content': 0.08648651838302612, 'timestamp': '2025-10-01 04:36:00.824960', 'step': 13887, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:36:00.883173', 'step': 13887, 'epoch': 2} {'type': 'loss', 'content': 0.16904805600643158, 'timestamp': '2025-10-01 04:36:00.892627', 'step': 13888, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:00.945174', 'step': 13888, 'epoch': 2} {'type': 'loss', 'content': 0.082015760242939, 'timestamp': '2025-10-01 04:36:00.947360', 'step': 13889, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:36:01.016373', 'step': 13889, 'epoch': 2} {'type': 'loss', 'content': 0.14577466249465942, 'timestamp': '2025-10-01 04:36:01.018835', 'step': 13890, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:01.083295', 'step': 13890, 'epoch': 2} {'type': 'loss', 'content': 0.09557714313268661, 'timestamp': '2025-10-01 04:36:01.086122', 'step': 13891, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:01.146172', 'step': 13891, 'epoch': 2} {'type': 'loss', 'content': 0.15109996497631073, 'timestamp': '2025-10-01 04:36:01.152530', 'step': 13892, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:01.210176', 'step': 13892, 'epoch': 2} {'type': 'loss', 'content': 0.08668055385351181, 'timestamp': '2025-10-01 04:36:01.212578', 'step': 13893, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:01.266522', 'step': 13893, 'epoch': 2} {'type': 'loss', 'content': 0.06414633244276047, 'timestamp': '2025-10-01 04:36:01.268770', 'step': 13894, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:01.337301', 'step': 13894, 'epoch': 2} {'type': 'loss', 'content': 0.1588922142982483, 'timestamp': '2025-10-01 04:36:01.339569', 'step': 13895, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:01.400016', 'step': 13895, 'epoch': 2} {'type': 'loss', 'content': 0.15252138674259186, 'timestamp': '2025-10-01 04:36:01.405857', 'step': 13896, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:01.459985', 'step': 13896, 'epoch': 2} {'type': 'loss', 'content': 0.1444934904575348, 'timestamp': '2025-10-01 04:36:01.473205', 'step': 13897, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:01.526556', 'step': 13897, 'epoch': 2} {'type': 'loss', 'content': 0.12626907229423523, 'timestamp': '2025-10-01 04:36:01.531105', 'step': 13898, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:01.585528', 'step': 13898, 'epoch': 2} {'type': 'loss', 'content': 0.12391942739486694, 'timestamp': '2025-10-01 04:36:01.587806', 'step': 13899, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:01.640769', 'step': 13899, 'epoch': 2} {'type': 'loss', 'content': 0.07100982964038849, 'timestamp': '2025-10-01 04:36:01.647642', 'step': 13900, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:01.701292', 'step': 13900, 'epoch': 2} {'type': 'loss', 'content': 0.12229561805725098, 'timestamp': '2025-10-01 04:36:01.703464', 'step': 13901, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:01.771100', 'step': 13901, 'epoch': 2} {'type': 'loss', 'content': 0.11337463557720184, 'timestamp': '2025-10-01 04:36:01.773334', 'step': 13902, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:01.826501', 'step': 13902, 'epoch': 2} {'type': 'loss', 'content': 0.07502777129411697, 'timestamp': '2025-10-01 04:36:01.829394', 'step': 13903, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:01.882570', 'step': 13903, 'epoch': 2} {'type': 'loss', 'content': 0.12564672529697418, 'timestamp': '2025-10-01 04:36:01.888543', 'step': 13904, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:01.940641', 'step': 13904, 'epoch': 2} {'type': 'loss', 'content': 0.17003613710403442, 'timestamp': '2025-10-01 04:36:01.943015', 'step': 13905, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:01.996182', 'step': 13905, 'epoch': 2} {'type': 'loss', 'content': 0.19674305617809296, 'timestamp': '2025-10-01 04:36:01.998888', 'step': 13906, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:02.051824', 'step': 13906, 'epoch': 2} {'type': 'loss', 'content': 0.1567045897245407, 'timestamp': '2025-10-01 04:36:02.054021', 'step': 13907, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:02.108440', 'step': 13907, 'epoch': 2} {'type': 'loss', 'content': 0.16785043478012085, 'timestamp': '2025-10-01 04:36:02.114380', 'step': 13908, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:02.168755', 'step': 13908, 'epoch': 2} {'type': 'loss', 'content': 0.09725569933652878, 'timestamp': '2025-10-01 04:36:02.171178', 'step': 13909, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:36:02.225360', 'step': 13909, 'epoch': 2} {'type': 'loss', 'content': 0.08070685714483261, 'timestamp': '2025-10-01 04:36:02.227778', 'step': 13910, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:02.281092', 'step': 13910, 'epoch': 2} {'type': 'loss', 'content': 0.07101520150899887, 'timestamp': '2025-10-01 04:36:02.283279', 'step': 13911, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:02.337184', 'step': 13911, 'epoch': 2} {'type': 'loss', 'content': 0.12094128131866455, 'timestamp': '2025-10-01 04:36:02.345004', 'step': 13912, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:02.402139', 'step': 13912, 'epoch': 2} {'type': 'loss', 'content': 0.10374581068754196, 'timestamp': '2025-10-01 04:36:02.406763', 'step': 13913, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:02.463111', 'step': 13913, 'epoch': 2} {'type': 'loss', 'content': 0.10398560017347336, 'timestamp': '2025-10-01 04:36:02.466315', 'step': 13914, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:36:02.521245', 'step': 13914, 'epoch': 2} {'type': 'loss', 'content': 0.10960100591182709, 'timestamp': '2025-10-01 04:36:02.523913', 'step': 13915, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:36:02.577249', 'step': 13915, 'epoch': 2} {'type': 'loss', 'content': 0.12106394022703171, 'timestamp': '2025-10-01 04:36:02.586044', 'step': 13916, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:02.643256', 'step': 13916, 'epoch': 2} {'type': 'loss', 'content': 0.11965130269527435, 'timestamp': '2025-10-01 04:36:02.646338', 'step': 13917, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:02.699961', 'step': 13917, 'epoch': 2} {'type': 'loss', 'content': 0.0607370100915432, 'timestamp': '2025-10-01 04:36:02.706898', 'step': 13918, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:02.760243', 'step': 13918, 'epoch': 2} {'type': 'loss', 'content': 0.16289401054382324, 'timestamp': '2025-10-01 04:36:02.762873', 'step': 13919, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:02.817753', 'step': 13919, 'epoch': 2} {'type': 'loss', 'content': 0.08591458946466446, 'timestamp': '2025-10-01 04:36:02.823896', 'step': 13920, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:02.877431', 'step': 13920, 'epoch': 2} {'type': 'loss', 'content': 0.10954494029283524, 'timestamp': '2025-10-01 04:36:02.882494', 'step': 13921, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:36:02.939896', 'step': 13921, 'epoch': 2} {'type': 'loss', 'content': 0.05748622119426727, 'timestamp': '2025-10-01 04:36:02.943058', 'step': 13922, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:36:02.998636', 'step': 13922, 'epoch': 2} {'type': 'loss', 'content': 0.1458168625831604, 'timestamp': '2025-10-01 04:36:03.001348', 'step': 13923, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:03.058338', 'step': 13923, 'epoch': 2} {'type': 'loss', 'content': 0.13444557785987854, 'timestamp': '2025-10-01 04:36:03.065145', 'step': 13924, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:03.119951', 'step': 13924, 'epoch': 2} {'type': 'loss', 'content': 0.14685313403606415, 'timestamp': '2025-10-01 04:36:03.128485', 'step': 13925, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:03.181373', 'step': 13925, 'epoch': 2} {'type': 'loss', 'content': 0.05743608623743057, 'timestamp': '2025-10-01 04:36:03.183690', 'step': 13926, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:03.237200', 'step': 13926, 'epoch': 2} {'type': 'loss', 'content': 0.07531706243753433, 'timestamp': '2025-10-01 04:36:03.246551', 'step': 13927, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:03.308604', 'step': 13927, 'epoch': 2} {'type': 'loss', 'content': 0.1974179744720459, 'timestamp': '2025-10-01 04:36:03.314504', 'step': 13928, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:03.367776', 'step': 13928, 'epoch': 2} {'type': 'loss', 'content': 0.10822585225105286, 'timestamp': '2025-10-01 04:36:03.369924', 'step': 13929, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:03.424716', 'step': 13929, 'epoch': 2} {'type': 'loss', 'content': 0.0612022802233696, 'timestamp': '2025-10-01 04:36:03.426954', 'step': 13930, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:03.481689', 'step': 13930, 'epoch': 2} {'type': 'loss', 'content': 0.13309769332408905, 'timestamp': '2025-10-01 04:36:03.483908', 'step': 13931, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:03.538372', 'step': 13931, 'epoch': 2} {'type': 'loss', 'content': 0.0619347020983696, 'timestamp': '2025-10-01 04:36:03.544913', 'step': 13932, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:03.599182', 'step': 13932, 'epoch': 2} {'type': 'loss', 'content': 0.10420192033052444, 'timestamp': '2025-10-01 04:36:03.602040', 'step': 13933, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:03.659351', 'step': 13933, 'epoch': 2} {'type': 'loss', 'content': 0.09911464154720306, 'timestamp': '2025-10-01 04:36:03.661834', 'step': 13934, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:36:03.715719', 'step': 13934, 'epoch': 2} {'type': 'loss', 'content': 0.14842519164085388, 'timestamp': '2025-10-01 04:36:03.718237', 'step': 13935, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:03.771944', 'step': 13935, 'epoch': 2} {'type': 'loss', 'content': 0.09498506039381027, 'timestamp': '2025-10-01 04:36:03.778107', 'step': 13936, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:03.831571', 'step': 13936, 'epoch': 2} {'type': 'loss', 'content': 0.08935911953449249, 'timestamp': '2025-10-01 04:36:03.834343', 'step': 13937, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:03.888102', 'step': 13937, 'epoch': 2} {'type': 'loss', 'content': 0.1014004573225975, 'timestamp': '2025-10-01 04:36:03.890353', 'step': 13938, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:03.943721', 'step': 13938, 'epoch': 2} {'type': 'loss', 'content': 0.1462687999010086, 'timestamp': '2025-10-01 04:36:03.945946', 'step': 13939, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:03.999299', 'step': 13939, 'epoch': 2} {'type': 'loss', 'content': 0.12643305957317352, 'timestamp': '2025-10-01 04:36:04.005418', 'step': 13940, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:04.057928', 'step': 13940, 'epoch': 2} {'type': 'loss', 'content': 0.14680950343608856, 'timestamp': '2025-10-01 04:36:04.060378', 'step': 13941, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:04.114707', 'step': 13941, 'epoch': 2} {'type': 'loss', 'content': 0.08261916041374207, 'timestamp': '2025-10-01 04:36:04.117248', 'step': 13942, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:04.175626', 'step': 13942, 'epoch': 2} {'type': 'loss', 'content': 0.12051402032375336, 'timestamp': '2025-10-01 04:36:04.178702', 'step': 13943, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:04.234140', 'step': 13943, 'epoch': 2} {'type': 'loss', 'content': 0.10538006573915482, 'timestamp': '2025-10-01 04:36:04.240237', 'step': 13944, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:04.295017', 'step': 13944, 'epoch': 2} {'type': 'loss', 'content': 0.1396806389093399, 'timestamp': '2025-10-01 04:36:04.297774', 'step': 13945, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:36:04.352535', 'step': 13945, 'epoch': 2} {'type': 'loss', 'content': 0.10925549268722534, 'timestamp': '2025-10-01 04:36:04.355493', 'step': 13946, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:04.411196', 'step': 13946, 'epoch': 2} {'type': 'loss', 'content': 0.1592615246772766, 'timestamp': '2025-10-01 04:36:04.413849', 'step': 13947, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:36:04.468642', 'step': 13947, 'epoch': 2} {'type': 'loss', 'content': 0.09524331241846085, 'timestamp': '2025-10-01 04:36:04.481546', 'step': 13948, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:04.535105', 'step': 13948, 'epoch': 2} {'type': 'loss', 'content': 0.11442182958126068, 'timestamp': '2025-10-01 04:36:04.537690', 'step': 13949, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:04.592378', 'step': 13949, 'epoch': 2} {'type': 'loss', 'content': 0.09189784526824951, 'timestamp': '2025-10-01 04:36:04.594997', 'step': 13950, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:04.649918', 'step': 13950, 'epoch': 2} {'type': 'loss', 'content': 0.08024387806653976, 'timestamp': '2025-10-01 04:36:04.652416', 'step': 13951, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:36:04.707226', 'step': 13951, 'epoch': 2} {'type': 'loss', 'content': 0.10290689021348953, 'timestamp': '2025-10-01 04:36:04.713642', 'step': 13952, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:04.769227', 'step': 13952, 'epoch': 2} {'type': 'loss', 'content': 0.16682983934879303, 'timestamp': '2025-10-01 04:36:04.771521', 'step': 13953, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:04.826302', 'step': 13953, 'epoch': 2} {'type': 'loss', 'content': 0.10211914777755737, 'timestamp': '2025-10-01 04:36:04.828706', 'step': 13954, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:04.882898', 'step': 13954, 'epoch': 2} {'type': 'loss', 'content': 0.10869625210762024, 'timestamp': '2025-10-01 04:36:04.885576', 'step': 13955, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:04.940683', 'step': 13955, 'epoch': 2} {'type': 'loss', 'content': 0.08057401329278946, 'timestamp': '2025-10-01 04:36:04.946439', 'step': 13956, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:05.000422', 'step': 13956, 'epoch': 2} {'type': 'loss', 'content': 0.12630344927310944, 'timestamp': '2025-10-01 04:36:05.002734', 'step': 13957, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:05.057127', 'step': 13957, 'epoch': 2} {'type': 'loss', 'content': 0.1307111233472824, 'timestamp': '2025-10-01 04:36:05.060251', 'step': 13958, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:05.113889', 'step': 13958, 'epoch': 2} {'type': 'loss', 'content': 0.14533790946006775, 'timestamp': '2025-10-01 04:36:05.121791', 'step': 13959, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:36:05.176699', 'step': 13959, 'epoch': 2} {'type': 'loss', 'content': 0.21649929881095886, 'timestamp': '2025-10-01 04:36:05.183161', 'step': 13960, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:05.238541', 'step': 13960, 'epoch': 2} {'type': 'loss', 'content': 0.15716609358787537, 'timestamp': '2025-10-01 04:36:05.240931', 'step': 13961, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:36:05.296275', 'step': 13961, 'epoch': 2} {'type': 'loss', 'content': 0.150570809841156, 'timestamp': '2025-10-01 04:36:05.299309', 'step': 13962, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:05.353925', 'step': 13962, 'epoch': 2} {'type': 'loss', 'content': 0.13962993025779724, 'timestamp': '2025-10-01 04:36:05.357038', 'step': 13963, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:05.411240', 'step': 13963, 'epoch': 2} {'type': 'loss', 'content': 0.15517303347587585, 'timestamp': '2025-10-01 04:36:05.417412', 'step': 13964, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:05.470629', 'step': 13964, 'epoch': 2} {'type': 'loss', 'content': 0.0918266698718071, 'timestamp': '2025-10-01 04:36:05.473079', 'step': 13965, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:05.526555', 'step': 13965, 'epoch': 2} {'type': 'loss', 'content': 0.0889061689376831, 'timestamp': '2025-10-01 04:36:05.529083', 'step': 13966, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:05.584942', 'step': 13966, 'epoch': 2} {'type': 'loss', 'content': 0.11011556535959244, 'timestamp': '2025-10-01 04:36:05.587198', 'step': 13967, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:05.640796', 'step': 13967, 'epoch': 2} {'type': 'loss', 'content': 0.1869693398475647, 'timestamp': '2025-10-01 04:36:05.646631', 'step': 13968, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:05.699508', 'step': 13968, 'epoch': 2} {'type': 'loss', 'content': 0.15250824391841888, 'timestamp': '2025-10-01 04:36:05.701686', 'step': 13969, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:05.755269', 'step': 13969, 'epoch': 2} {'type': 'loss', 'content': 0.08697563409805298, 'timestamp': '2025-10-01 04:36:05.757959', 'step': 13970, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:05.811127', 'step': 13970, 'epoch': 2} {'type': 'loss', 'content': 0.12543292343616486, 'timestamp': '2025-10-01 04:36:05.813344', 'step': 13971, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:05.866608', 'step': 13971, 'epoch': 2} {'type': 'loss', 'content': 0.1547241061925888, 'timestamp': '2025-10-01 04:36:05.872368', 'step': 13972, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:05.924793', 'step': 13972, 'epoch': 2} {'type': 'loss', 'content': 0.07045555859804153, 'timestamp': '2025-10-01 04:36:05.927001', 'step': 13973, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:05.980001', 'step': 13973, 'epoch': 2} {'type': 'loss', 'content': 0.14755207300186157, 'timestamp': '2025-10-01 04:36:05.982122', 'step': 13974, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:06.035332', 'step': 13974, 'epoch': 2} {'type': 'loss', 'content': 0.10160497575998306, 'timestamp': '2025-10-01 04:36:06.037659', 'step': 13975, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:06.091460', 'step': 13975, 'epoch': 2} {'type': 'loss', 'content': 0.15985216200351715, 'timestamp': '2025-10-01 04:36:06.097269', 'step': 13976, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:06.149851', 'step': 13976, 'epoch': 2} {'type': 'loss', 'content': 0.09016337990760803, 'timestamp': '2025-10-01 04:36:06.152333', 'step': 13977, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:06.205731', 'step': 13977, 'epoch': 2} {'type': 'loss', 'content': 0.1375478059053421, 'timestamp': '2025-10-01 04:36:06.210227', 'step': 13978, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:06.263792', 'step': 13978, 'epoch': 2} {'type': 'loss', 'content': 0.18184584379196167, 'timestamp': '2025-10-01 04:36:06.266019', 'step': 13979, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:06.319339', 'step': 13979, 'epoch': 2} {'type': 'loss', 'content': 0.07066765427589417, 'timestamp': '2025-10-01 04:36:06.325164', 'step': 13980, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-01 04:36:19.478162', 'step': 13980, 'epoch': 2} {'type': 'pplx', 'content': 10759.666836063645, 'timestamp': '2025-10-01 04:36:19.480935', 'step': 13980, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:19.533712', 'step': 13980, 'epoch': 2} {'type': 'loss', 'content': 0.08764184266328812, 'timestamp': '2025-10-01 04:36:19.535825', 'step': 13981, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:19.590178', 'step': 13981, 'epoch': 2} {'type': 'loss', 'content': 0.12566061317920685, 'timestamp': '2025-10-01 04:36:19.592348', 'step': 13982, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:19.645784', 'step': 13982, 'epoch': 2} {'type': 'loss', 'content': 0.1022576093673706, 'timestamp': '2025-10-01 04:36:19.647923', 'step': 13983, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:19.701292', 'step': 13983, 'epoch': 2} {'type': 'loss', 'content': 0.1506766527891159, 'timestamp': '2025-10-01 04:36:19.717037', 'step': 13984, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:19.770638', 'step': 13984, 'epoch': 2} {'type': 'loss', 'content': 0.09335709363222122, 'timestamp': '2025-10-01 04:36:19.772662', 'step': 13985, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:19.825252', 'step': 13985, 'epoch': 2} {'type': 'loss', 'content': 0.05483272299170494, 'timestamp': '2025-10-01 04:36:19.827297', 'step': 13986, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:19.882527', 'step': 13986, 'epoch': 2} {'type': 'loss', 'content': 0.16435939073562622, 'timestamp': '2025-10-01 04:36:19.891854', 'step': 13987, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:19.945212', 'step': 13987, 'epoch': 2} {'type': 'loss', 'content': 0.06794432550668716, 'timestamp': '2025-10-01 04:36:19.954897', 'step': 13988, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:20.008004', 'step': 13988, 'epoch': 2} {'type': 'loss', 'content': 0.11918415129184723, 'timestamp': '2025-10-01 04:36:20.010265', 'step': 13989, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:20.080176', 'step': 13989, 'epoch': 2} {'type': 'loss', 'content': 0.2323056310415268, 'timestamp': '2025-10-01 04:36:20.082250', 'step': 13990, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:20.162587', 'step': 13990, 'epoch': 2} {'type': 'loss', 'content': 0.14112943410873413, 'timestamp': '2025-10-01 04:36:20.164927', 'step': 13991, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:20.219721', 'step': 13991, 'epoch': 2} {'type': 'loss', 'content': 0.10108312964439392, 'timestamp': '2025-10-01 04:36:20.225327', 'step': 13992, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:36:20.279495', 'step': 13992, 'epoch': 2} {'type': 'loss', 'content': 0.10144844651222229, 'timestamp': '2025-10-01 04:36:20.281843', 'step': 13993, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:20.339815', 'step': 13993, 'epoch': 2} {'type': 'loss', 'content': 0.119356170296669, 'timestamp': '2025-10-01 04:36:20.342045', 'step': 13994, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:20.399113', 'step': 13994, 'epoch': 2} {'type': 'loss', 'content': 0.04594111070036888, 'timestamp': '2025-10-01 04:36:20.400758', 'step': 13995, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:20.457857', 'step': 13995, 'epoch': 2} {'type': 'loss', 'content': 0.12136711925268173, 'timestamp': '2025-10-01 04:36:20.464176', 'step': 13996, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:20.516640', 'step': 13996, 'epoch': 2} {'type': 'loss', 'content': 0.04526916891336441, 'timestamp': '2025-10-01 04:36:20.524171', 'step': 13997, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:20.577408', 'step': 13997, 'epoch': 2} {'type': 'loss', 'content': 0.12565170228481293, 'timestamp': '2025-10-01 04:36:20.579535', 'step': 13998, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:20.632709', 'step': 13998, 'epoch': 2} {'type': 'loss', 'content': 0.14707602560520172, 'timestamp': '2025-10-01 04:36:20.644413', 'step': 13999, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:20.711805', 'step': 13999, 'epoch': 2} {'type': 'loss', 'content': 0.10627275705337524, 'timestamp': '2025-10-01 04:36:20.729873', 'step': 14000, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 14000', 'timestamp': '2025-10-01 04:36:21.110613', 'step': 14000, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:21.166173', 'step': 14000, 'epoch': 2} {'type': 'loss', 'content': 0.12846386432647705, 'timestamp': '2025-10-01 04:36:21.170103', 'step': 14001, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:21.225521', 'step': 14001, 'epoch': 2} {'type': 'loss', 'content': 0.10688534379005432, 'timestamp': '2025-10-01 04:36:21.227893', 'step': 14002, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:36:21.282441', 'step': 14002, 'epoch': 2} {'type': 'loss', 'content': 0.13890361785888672, 'timestamp': '2025-10-01 04:36:21.284893', 'step': 14003, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:21.340211', 'step': 14003, 'epoch': 2} {'type': 'loss', 'content': 0.12122400104999542, 'timestamp': '2025-10-01 04:36:21.346601', 'step': 14004, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:21.400757', 'step': 14004, 'epoch': 2} {'type': 'loss', 'content': 0.06446333974599838, 'timestamp': '2025-10-01 04:36:21.403247', 'step': 14005, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:36:21.457540', 'step': 14005, 'epoch': 2} {'type': 'loss', 'content': 0.08370838314294815, 'timestamp': '2025-10-01 04:36:21.459941', 'step': 14006, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:21.514599', 'step': 14006, 'epoch': 2} {'type': 'loss', 'content': 0.03369873762130737, 'timestamp': '2025-10-01 04:36:21.516769', 'step': 14007, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:21.571425', 'step': 14007, 'epoch': 2} {'type': 'loss', 'content': 0.09483089298009872, 'timestamp': '2025-10-01 04:36:21.577610', 'step': 14008, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:21.631559', 'step': 14008, 'epoch': 2} {'type': 'loss', 'content': 0.12197982519865036, 'timestamp': '2025-10-01 04:36:21.634153', 'step': 14009, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:21.701936', 'step': 14009, 'epoch': 2} {'type': 'loss', 'content': 0.16185955703258514, 'timestamp': '2025-10-01 04:36:21.704151', 'step': 14010, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:21.758110', 'step': 14010, 'epoch': 2} {'type': 'loss', 'content': 0.16148020327091217, 'timestamp': '2025-10-01 04:36:21.760281', 'step': 14011, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:21.814652', 'step': 14011, 'epoch': 2} {'type': 'loss', 'content': 0.10478386282920837, 'timestamp': '2025-10-01 04:36:21.821437', 'step': 14012, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:21.881594', 'step': 14012, 'epoch': 2} {'type': 'loss', 'content': 0.13601696491241455, 'timestamp': '2025-10-01 04:36:21.884129', 'step': 14013, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:21.937467', 'step': 14013, 'epoch': 2} {'type': 'loss', 'content': 0.022931301966309547, 'timestamp': '2025-10-01 04:36:21.941542', 'step': 14014, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:21.998971', 'step': 14014, 'epoch': 2} {'type': 'loss', 'content': 0.11797231435775757, 'timestamp': '2025-10-01 04:36:22.001706', 'step': 14015, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:36:22.066422', 'step': 14015, 'epoch': 2} {'type': 'loss', 'content': 0.20553196966648102, 'timestamp': '2025-10-01 04:36:22.072627', 'step': 14016, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:36:22.129831', 'step': 14016, 'epoch': 2} {'type': 'loss', 'content': 0.08720996975898743, 'timestamp': '2025-10-01 04:36:22.132213', 'step': 14017, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:36:22.188105', 'step': 14017, 'epoch': 2} {'type': 'loss', 'content': 0.09768690168857574, 'timestamp': '2025-10-01 04:36:22.190227', 'step': 14018, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:22.243938', 'step': 14018, 'epoch': 2} {'type': 'loss', 'content': 0.16261477768421173, 'timestamp': '2025-10-01 04:36:22.246052', 'step': 14019, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:22.312122', 'step': 14019, 'epoch': 2} {'type': 'loss', 'content': 0.10105488449335098, 'timestamp': '2025-10-01 04:36:22.317839', 'step': 14020, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:22.371123', 'step': 14020, 'epoch': 2} {'type': 'loss', 'content': 0.11963297426700592, 'timestamp': '2025-10-01 04:36:22.374025', 'step': 14021, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:22.429163', 'step': 14021, 'epoch': 2} {'type': 'loss', 'content': 0.11474819481372833, 'timestamp': '2025-10-01 04:36:22.431318', 'step': 14022, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:22.484536', 'step': 14022, 'epoch': 2} {'type': 'loss', 'content': 0.14859189093112946, 'timestamp': '2025-10-01 04:36:22.489664', 'step': 14023, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:22.548759', 'step': 14023, 'epoch': 2} {'type': 'loss', 'content': 0.16532844305038452, 'timestamp': '2025-10-01 04:36:22.554862', 'step': 14024, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:36:22.609770', 'step': 14024, 'epoch': 2} {'type': 'loss', 'content': 0.1011594608426094, 'timestamp': '2025-10-01 04:36:22.611717', 'step': 14025, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:22.670438', 'step': 14025, 'epoch': 2} {'type': 'loss', 'content': 0.10932467132806778, 'timestamp': '2025-10-01 04:36:22.673017', 'step': 14026, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:22.726908', 'step': 14026, 'epoch': 2} {'type': 'loss', 'content': 0.10637973248958588, 'timestamp': '2025-10-01 04:36:22.728976', 'step': 14027, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:22.784855', 'step': 14027, 'epoch': 2} {'type': 'loss', 'content': 0.12598766386508942, 'timestamp': '2025-10-01 04:36:22.792073', 'step': 14028, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:22.845652', 'step': 14028, 'epoch': 2} {'type': 'loss', 'content': 0.11814974248409271, 'timestamp': '2025-10-01 04:36:22.847922', 'step': 14029, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:36:22.908644', 'step': 14029, 'epoch': 2} {'type': 'loss', 'content': 0.1971268355846405, 'timestamp': '2025-10-01 04:36:22.914353', 'step': 14030, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:36:22.971495', 'step': 14030, 'epoch': 2} {'type': 'loss', 'content': 0.11817241460084915, 'timestamp': '2025-10-01 04:36:22.973585', 'step': 14031, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:36:23.027419', 'step': 14031, 'epoch': 2} {'type': 'loss', 'content': 0.11962958425283432, 'timestamp': '2025-10-01 04:36:23.032992', 'step': 14032, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:23.085639', 'step': 14032, 'epoch': 2} {'type': 'loss', 'content': 0.07399642467498779, 'timestamp': '2025-10-01 04:36:23.087748', 'step': 14033, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:23.140670', 'step': 14033, 'epoch': 2} {'type': 'loss', 'content': 0.1146036833524704, 'timestamp': '2025-10-01 04:36:23.142823', 'step': 14034, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:23.196077', 'step': 14034, 'epoch': 2} {'type': 'loss', 'content': 0.15216705203056335, 'timestamp': '2025-10-01 04:36:23.198196', 'step': 14035, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:36:23.252260', 'step': 14035, 'epoch': 2} {'type': 'loss', 'content': 0.09049917012453079, 'timestamp': '2025-10-01 04:36:23.257881', 'step': 14036, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:36:23.310574', 'step': 14036, 'epoch': 2} {'type': 'loss', 'content': 0.07432195544242859, 'timestamp': '2025-10-01 04:36:23.318237', 'step': 14037, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:23.371485', 'step': 14037, 'epoch': 2} {'type': 'loss', 'content': 0.11199964582920074, 'timestamp': '2025-10-01 04:36:23.376414', 'step': 14038, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:23.434326', 'step': 14038, 'epoch': 2} {'type': 'loss', 'content': 0.10160090029239655, 'timestamp': '2025-10-01 04:36:23.437455', 'step': 14039, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:23.490617', 'step': 14039, 'epoch': 2} {'type': 'loss', 'content': 0.1524810492992401, 'timestamp': '2025-10-01 04:36:23.496234', 'step': 14040, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:23.558199', 'step': 14040, 'epoch': 2} {'type': 'loss', 'content': 0.15936589241027832, 'timestamp': '2025-10-01 04:36:23.560301', 'step': 14041, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:23.613110', 'step': 14041, 'epoch': 2} {'type': 'loss', 'content': 0.07471398264169693, 'timestamp': '2025-10-01 04:36:23.615407', 'step': 14042, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:23.670465', 'step': 14042, 'epoch': 2} {'type': 'loss', 'content': 0.0741535946726799, 'timestamp': '2025-10-01 04:36:23.672826', 'step': 14043, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:23.725949', 'step': 14043, 'epoch': 2} {'type': 'loss', 'content': 0.16368743777275085, 'timestamp': '2025-10-01 04:36:23.731821', 'step': 14044, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:36:23.784770', 'step': 14044, 'epoch': 2} {'type': 'loss', 'content': 0.2092316895723343, 'timestamp': '2025-10-01 04:36:23.786967', 'step': 14045, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:23.840341', 'step': 14045, 'epoch': 2} {'type': 'loss', 'content': 0.04848962649703026, 'timestamp': '2025-10-01 04:36:23.842454', 'step': 14046, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:36:23.896347', 'step': 14046, 'epoch': 2} {'type': 'loss', 'content': 0.13228709995746613, 'timestamp': '2025-10-01 04:36:23.898902', 'step': 14047, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:23.952495', 'step': 14047, 'epoch': 2} {'type': 'loss', 'content': 0.13707353174686432, 'timestamp': '2025-10-01 04:36:23.958217', 'step': 14048, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:24.010847', 'step': 14048, 'epoch': 2} {'type': 'loss', 'content': 0.1281195431947708, 'timestamp': '2025-10-01 04:36:24.012851', 'step': 14049, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:36:24.065722', 'step': 14049, 'epoch': 2} {'type': 'loss', 'content': 0.10287440568208694, 'timestamp': '2025-10-01 04:36:24.068779', 'step': 14050, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:36:24.121828', 'step': 14050, 'epoch': 2} {'type': 'loss', 'content': 0.10637800395488739, 'timestamp': '2025-10-01 04:36:24.134417', 'step': 14051, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:36:24.189933', 'step': 14051, 'epoch': 2} {'type': 'loss', 'content': 0.1734997034072876, 'timestamp': '2025-10-01 04:36:24.195604', 'step': 14052, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:24.248567', 'step': 14052, 'epoch': 2} {'type': 'loss', 'content': 0.06585986167192459, 'timestamp': '2025-10-01 04:36:24.250658', 'step': 14053, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:24.303737', 'step': 14053, 'epoch': 2} {'type': 'loss', 'content': 0.1290181428194046, 'timestamp': '2025-10-01 04:36:24.305801', 'step': 14054, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:24.358571', 'step': 14054, 'epoch': 2} {'type': 'loss', 'content': 0.0905279740691185, 'timestamp': '2025-10-01 04:36:24.360767', 'step': 14055, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:36:24.414454', 'step': 14055, 'epoch': 2} {'type': 'loss', 'content': 0.08868397772312164, 'timestamp': '2025-10-01 04:36:24.420959', 'step': 14056, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:24.474282', 'step': 14056, 'epoch': 2} {'type': 'loss', 'content': 0.13248127698898315, 'timestamp': '2025-10-01 04:36:24.476614', 'step': 14057, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:24.532435', 'step': 14057, 'epoch': 2} {'type': 'loss', 'content': 0.03133159503340721, 'timestamp': '2025-10-01 04:36:24.534761', 'step': 14058, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:36:24.588615', 'step': 14058, 'epoch': 2} {'type': 'loss', 'content': 0.06079297512769699, 'timestamp': '2025-10-01 04:36:24.590698', 'step': 14059, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:24.644365', 'step': 14059, 'epoch': 2} {'type': 'loss', 'content': 0.08199764043092728, 'timestamp': '2025-10-01 04:36:24.650028', 'step': 14060, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:24.704551', 'step': 14060, 'epoch': 2} {'type': 'loss', 'content': 0.08183763921260834, 'timestamp': '2025-10-01 04:36:24.707217', 'step': 14061, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:24.760197', 'step': 14061, 'epoch': 2} {'type': 'loss', 'content': 0.09587801992893219, 'timestamp': '2025-10-01 04:36:24.762312', 'step': 14062, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:36:24.815807', 'step': 14062, 'epoch': 2} {'type': 'loss', 'content': 0.1703718602657318, 'timestamp': '2025-10-01 04:36:24.818117', 'step': 14063, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:24.871673', 'step': 14063, 'epoch': 2} {'type': 'loss', 'content': 0.22687146067619324, 'timestamp': '2025-10-01 04:36:24.877277', 'step': 14064, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:24.929531', 'step': 14064, 'epoch': 2} {'type': 'loss', 'content': 0.051486093550920486, 'timestamp': '2025-10-01 04:36:24.931641', 'step': 14065, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:24.984881', 'step': 14065, 'epoch': 2} {'type': 'loss', 'content': 0.08845195174217224, 'timestamp': '2025-10-01 04:36:24.986982', 'step': 14066, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:36:25.040362', 'step': 14066, 'epoch': 2} {'type': 'loss', 'content': 0.08878414332866669, 'timestamp': '2025-10-01 04:36:25.042483', 'step': 14067, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:25.095849', 'step': 14067, 'epoch': 2} {'type': 'loss', 'content': 0.09230835735797882, 'timestamp': '2025-10-01 04:36:25.101436', 'step': 14068, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:25.155042', 'step': 14068, 'epoch': 2} {'type': 'loss', 'content': 0.11792033165693283, 'timestamp': '2025-10-01 04:36:25.157190', 'step': 14069, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:25.210160', 'step': 14069, 'epoch': 2} {'type': 'loss', 'content': 0.18075574934482574, 'timestamp': '2025-10-01 04:36:25.212222', 'step': 14070, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:25.264992', 'step': 14070, 'epoch': 2} {'type': 'loss', 'content': 0.11372993141412735, 'timestamp': '2025-10-01 04:36:25.268129', 'step': 14071, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:25.321869', 'step': 14071, 'epoch': 2} {'type': 'loss', 'content': 0.11134660243988037, 'timestamp': '2025-10-01 04:36:25.327639', 'step': 14072, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:25.381027', 'step': 14072, 'epoch': 2} {'type': 'loss', 'content': 0.12303977459669113, 'timestamp': '2025-10-01 04:36:25.383155', 'step': 14073, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:25.437088', 'step': 14073, 'epoch': 2} {'type': 'loss', 'content': 0.14558400213718414, 'timestamp': '2025-10-01 04:36:25.439346', 'step': 14074, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:25.492773', 'step': 14074, 'epoch': 2} {'type': 'loss', 'content': 0.06271577626466751, 'timestamp': '2025-10-01 04:36:25.494999', 'step': 14075, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:25.548088', 'step': 14075, 'epoch': 2} {'type': 'loss', 'content': 0.14007659256458282, 'timestamp': '2025-10-01 04:36:25.553720', 'step': 14076, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:25.606261', 'step': 14076, 'epoch': 2} {'type': 'loss', 'content': 0.09852652996778488, 'timestamp': '2025-10-01 04:36:25.608177', 'step': 14077, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:25.662157', 'step': 14077, 'epoch': 2} {'type': 'loss', 'content': 0.037212979048490524, 'timestamp': '2025-10-01 04:36:25.664561', 'step': 14078, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:25.718188', 'step': 14078, 'epoch': 2} {'type': 'loss', 'content': 0.06351137906312943, 'timestamp': '2025-10-01 04:36:25.720444', 'step': 14079, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:25.774586', 'step': 14079, 'epoch': 2} {'type': 'loss', 'content': 0.08495853841304779, 'timestamp': '2025-10-01 04:36:25.780216', 'step': 14080, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:25.832904', 'step': 14080, 'epoch': 2} {'type': 'loss', 'content': 0.10444449633359909, 'timestamp': '2025-10-01 04:36:25.835102', 'step': 14081, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:25.891990', 'step': 14081, 'epoch': 2} {'type': 'loss', 'content': 0.10372402518987656, 'timestamp': '2025-10-01 04:36:25.894079', 'step': 14082, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:25.947263', 'step': 14082, 'epoch': 2} {'type': 'loss', 'content': 0.16644597053527832, 'timestamp': '2025-10-01 04:36:25.949463', 'step': 14083, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:26.002592', 'step': 14083, 'epoch': 2} {'type': 'loss', 'content': 0.058492064476013184, 'timestamp': '2025-10-01 04:36:26.008448', 'step': 14084, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:26.060770', 'step': 14084, 'epoch': 2} {'type': 'loss', 'content': 0.1698056310415268, 'timestamp': '2025-10-01 04:36:26.063058', 'step': 14085, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:26.116400', 'step': 14085, 'epoch': 2} {'type': 'loss', 'content': 0.12083733826875687, 'timestamp': '2025-10-01 04:36:26.118722', 'step': 14086, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:26.173562', 'step': 14086, 'epoch': 2} {'type': 'loss', 'content': 0.04734749719500542, 'timestamp': '2025-10-01 04:36:26.175815', 'step': 14087, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:26.229408', 'step': 14087, 'epoch': 2} {'type': 'loss', 'content': 0.11758797615766525, 'timestamp': '2025-10-01 04:36:26.235112', 'step': 14088, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:26.288293', 'step': 14088, 'epoch': 2} {'type': 'loss', 'content': 0.19959299266338348, 'timestamp': '2025-10-01 04:36:26.290528', 'step': 14089, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:26.344057', 'step': 14089, 'epoch': 2} {'type': 'loss', 'content': 0.10433588922023773, 'timestamp': '2025-10-01 04:36:26.346308', 'step': 14090, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:26.399381', 'step': 14090, 'epoch': 2} {'type': 'loss', 'content': 0.13622160255908966, 'timestamp': '2025-10-01 04:36:26.401496', 'step': 14091, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:26.454894', 'step': 14091, 'epoch': 2} {'type': 'loss', 'content': 0.08086797595024109, 'timestamp': '2025-10-01 04:36:26.460541', 'step': 14092, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:26.512666', 'step': 14092, 'epoch': 2} {'type': 'loss', 'content': 0.07001184672117233, 'timestamp': '2025-10-01 04:36:26.514827', 'step': 14093, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:26.567479', 'step': 14093, 'epoch': 2} {'type': 'loss', 'content': 0.09347893297672272, 'timestamp': '2025-10-01 04:36:26.569603', 'step': 14094, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:26.622872', 'step': 14094, 'epoch': 2} {'type': 'loss', 'content': 0.17701032757759094, 'timestamp': '2025-10-01 04:36:26.625006', 'step': 14095, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:26.678215', 'step': 14095, 'epoch': 2} {'type': 'loss', 'content': 0.0983518660068512, 'timestamp': '2025-10-01 04:36:26.683921', 'step': 14096, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:26.736788', 'step': 14096, 'epoch': 2} {'type': 'loss', 'content': 0.024296019226312637, 'timestamp': '2025-10-01 04:36:26.738770', 'step': 14097, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:26.792095', 'step': 14097, 'epoch': 2} {'type': 'loss', 'content': 0.09085346013307571, 'timestamp': '2025-10-01 04:36:26.794160', 'step': 14098, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:26.846990', 'step': 14098, 'epoch': 2} {'type': 'loss', 'content': 0.13259994983673096, 'timestamp': '2025-10-01 04:36:26.849245', 'step': 14099, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:26.904106', 'step': 14099, 'epoch': 2} {'type': 'loss', 'content': 0.1340545117855072, 'timestamp': '2025-10-01 04:36:26.909913', 'step': 14100, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:26.962964', 'step': 14100, 'epoch': 2} {'type': 'loss', 'content': 0.1451166719198227, 'timestamp': '2025-10-01 04:36:26.965306', 'step': 14101, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:27.018177', 'step': 14101, 'epoch': 2} {'type': 'loss', 'content': 0.06486847251653671, 'timestamp': '2025-10-01 04:36:27.020251', 'step': 14102, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:36:27.073787', 'step': 14102, 'epoch': 2} {'type': 'loss', 'content': 0.17877215147018433, 'timestamp': '2025-10-01 04:36:27.076326', 'step': 14103, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:27.129804', 'step': 14103, 'epoch': 2} {'type': 'loss', 'content': 0.13431836664676666, 'timestamp': '2025-10-01 04:36:27.135527', 'step': 14104, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:27.188233', 'step': 14104, 'epoch': 2} {'type': 'loss', 'content': 0.06665464490652084, 'timestamp': '2025-10-01 04:36:27.191037', 'step': 14105, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:27.244100', 'step': 14105, 'epoch': 2} {'type': 'loss', 'content': 0.07342259585857391, 'timestamp': '2025-10-01 04:36:27.246172', 'step': 14106, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:36:27.299429', 'step': 14106, 'epoch': 2} {'type': 'loss', 'content': 0.03181732818484306, 'timestamp': '2025-10-01 04:36:27.301942', 'step': 14107, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:27.358016', 'step': 14107, 'epoch': 2} {'type': 'loss', 'content': 0.08276362717151642, 'timestamp': '2025-10-01 04:36:27.364590', 'step': 14108, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:36:27.418901', 'step': 14108, 'epoch': 2} {'type': 'loss', 'content': 0.19301125407218933, 'timestamp': '2025-10-01 04:36:27.421067', 'step': 14109, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:27.475658', 'step': 14109, 'epoch': 2} {'type': 'loss', 'content': 0.12812450528144836, 'timestamp': '2025-10-01 04:36:27.477845', 'step': 14110, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:27.532677', 'step': 14110, 'epoch': 2} {'type': 'loss', 'content': 0.05657804012298584, 'timestamp': '2025-10-01 04:36:27.535095', 'step': 14111, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:36:27.588741', 'step': 14111, 'epoch': 2} {'type': 'loss', 'content': 0.11829346418380737, 'timestamp': '2025-10-01 04:36:27.595139', 'step': 14112, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:27.649207', 'step': 14112, 'epoch': 2} {'type': 'loss', 'content': 0.13793350756168365, 'timestamp': '2025-10-01 04:36:27.651768', 'step': 14113, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:27.705410', 'step': 14113, 'epoch': 2} {'type': 'loss', 'content': 0.08810234814882278, 'timestamp': '2025-10-01 04:36:27.707643', 'step': 14114, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:27.762547', 'step': 14114, 'epoch': 2} {'type': 'loss', 'content': 0.15363216400146484, 'timestamp': '2025-10-01 04:36:27.764868', 'step': 14115, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:27.819786', 'step': 14115, 'epoch': 2} {'type': 'loss', 'content': 0.07387725263834, 'timestamp': '2025-10-01 04:36:27.825967', 'step': 14116, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:27.880100', 'step': 14116, 'epoch': 2} {'type': 'loss', 'content': 0.09330498427152634, 'timestamp': '2025-10-01 04:36:27.882519', 'step': 14117, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:36:27.938504', 'step': 14117, 'epoch': 2} {'type': 'loss', 'content': 0.21822622418403625, 'timestamp': '2025-10-01 04:36:27.940633', 'step': 14118, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:27.993831', 'step': 14118, 'epoch': 2} {'type': 'loss', 'content': 0.0612034797668457, 'timestamp': '2025-10-01 04:36:27.996035', 'step': 14119, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:28.049388', 'step': 14119, 'epoch': 2} {'type': 'loss', 'content': 0.15321573615074158, 'timestamp': '2025-10-01 04:36:28.055271', 'step': 14120, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:28.107878', 'step': 14120, 'epoch': 2} {'type': 'loss', 'content': 0.03301329165697098, 'timestamp': '2025-10-01 04:36:28.110038', 'step': 14121, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:36:28.165055', 'step': 14121, 'epoch': 2} {'type': 'loss', 'content': 0.07962483912706375, 'timestamp': '2025-10-01 04:36:28.167131', 'step': 14122, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:28.220411', 'step': 14122, 'epoch': 2} {'type': 'loss', 'content': 0.09505531936883926, 'timestamp': '2025-10-01 04:36:28.232392', 'step': 14123, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:28.285685', 'step': 14123, 'epoch': 2} {'type': 'loss', 'content': 0.12198419123888016, 'timestamp': '2025-10-01 04:36:28.291575', 'step': 14124, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:28.350164', 'step': 14124, 'epoch': 2} {'type': 'loss', 'content': 0.03401799872517586, 'timestamp': '2025-10-01 04:36:28.352905', 'step': 14125, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:28.405898', 'step': 14125, 'epoch': 2} {'type': 'loss', 'content': 0.0649413987994194, 'timestamp': '2025-10-01 04:36:28.408047', 'step': 14126, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:28.461988', 'step': 14126, 'epoch': 2} {'type': 'loss', 'content': 0.28484907746315, 'timestamp': '2025-10-01 04:36:28.464747', 'step': 14127, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:28.518492', 'step': 14127, 'epoch': 2} {'type': 'loss', 'content': 0.12369900196790695, 'timestamp': '2025-10-01 04:36:28.524231', 'step': 14128, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:28.579566', 'step': 14128, 'epoch': 2} {'type': 'loss', 'content': 0.11482730507850647, 'timestamp': '2025-10-01 04:36:28.581904', 'step': 14129, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:28.635673', 'step': 14129, 'epoch': 2} {'type': 'loss', 'content': 0.1015116274356842, 'timestamp': '2025-10-01 04:36:28.637978', 'step': 14130, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:28.703495', 'step': 14130, 'epoch': 2} {'type': 'loss', 'content': 0.01128257904201746, 'timestamp': '2025-10-01 04:36:28.705610', 'step': 14131, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:28.762182', 'step': 14131, 'epoch': 2} {'type': 'loss', 'content': 0.07482772320508957, 'timestamp': '2025-10-01 04:36:28.768129', 'step': 14132, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:28.821728', 'step': 14132, 'epoch': 2} {'type': 'loss', 'content': 0.11990658938884735, 'timestamp': '2025-10-01 04:36:28.823959', 'step': 14133, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:28.879417', 'step': 14133, 'epoch': 2} {'type': 'loss', 'content': 0.19037769734859467, 'timestamp': '2025-10-01 04:36:28.881734', 'step': 14134, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:28.937182', 'step': 14134, 'epoch': 2} {'type': 'loss', 'content': 0.20399175584316254, 'timestamp': '2025-10-01 04:36:28.939340', 'step': 14135, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:28.994343', 'step': 14135, 'epoch': 2} {'type': 'loss', 'content': 0.08488573133945465, 'timestamp': '2025-10-01 04:36:29.000851', 'step': 14136, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:36:29.055580', 'step': 14136, 'epoch': 2} {'type': 'loss', 'content': 0.0595756359398365, 'timestamp': '2025-10-01 04:36:29.057850', 'step': 14137, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:29.117147', 'step': 14137, 'epoch': 2} {'type': 'loss', 'content': 0.09678993374109268, 'timestamp': '2025-10-01 04:36:29.119146', 'step': 14138, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:29.173868', 'step': 14138, 'epoch': 2} {'type': 'loss', 'content': 0.1975000947713852, 'timestamp': '2025-10-01 04:36:29.176038', 'step': 14139, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:36:29.234561', 'step': 14139, 'epoch': 2} {'type': 'loss', 'content': 0.10942145437002182, 'timestamp': '2025-10-01 04:36:29.241708', 'step': 14140, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:29.305495', 'step': 14140, 'epoch': 2} {'type': 'loss', 'content': 0.060941893607378006, 'timestamp': '2025-10-01 04:36:29.312090', 'step': 14141, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:29.384600', 'step': 14141, 'epoch': 2} {'type': 'loss', 'content': 0.09293698519468307, 'timestamp': '2025-10-01 04:36:29.386884', 'step': 14142, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:29.441959', 'step': 14142, 'epoch': 2} {'type': 'loss', 'content': 0.16976696252822876, 'timestamp': '2025-10-01 04:36:29.444237', 'step': 14143, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:29.501095', 'step': 14143, 'epoch': 2} {'type': 'loss', 'content': 0.16199630498886108, 'timestamp': '2025-10-01 04:36:29.508128', 'step': 14144, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:36:29.561334', 'step': 14144, 'epoch': 2} {'type': 'loss', 'content': 0.15157553553581238, 'timestamp': '2025-10-01 04:36:29.563779', 'step': 14145, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:29.618247', 'step': 14145, 'epoch': 2} {'type': 'loss', 'content': 0.05256464704871178, 'timestamp': '2025-10-01 04:36:29.620393', 'step': 14146, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:29.678166', 'step': 14146, 'epoch': 2} {'type': 'loss', 'content': 0.08830718696117401, 'timestamp': '2025-10-01 04:36:29.680683', 'step': 14147, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:29.734803', 'step': 14147, 'epoch': 2} {'type': 'loss', 'content': 0.11685657501220703, 'timestamp': '2025-10-01 04:36:29.740729', 'step': 14148, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:29.793554', 'step': 14148, 'epoch': 2} {'type': 'loss', 'content': 0.2145945280790329, 'timestamp': '2025-10-01 04:36:29.795643', 'step': 14149, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:36:29.853738', 'step': 14149, 'epoch': 2} {'type': 'loss', 'content': 0.20118403434753418, 'timestamp': '2025-10-01 04:36:29.856025', 'step': 14150, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:29.914346', 'step': 14150, 'epoch': 2} {'type': 'loss', 'content': 0.11530956625938416, 'timestamp': '2025-10-01 04:36:29.918280', 'step': 14151, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:29.980634', 'step': 14151, 'epoch': 2} {'type': 'loss', 'content': 0.0752294734120369, 'timestamp': '2025-10-01 04:36:29.986824', 'step': 14152, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:30.044166', 'step': 14152, 'epoch': 2} {'type': 'loss', 'content': 0.16417835652828217, 'timestamp': '2025-10-01 04:36:30.053103', 'step': 14153, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:30.114047', 'step': 14153, 'epoch': 2} {'type': 'loss', 'content': 0.08745746314525604, 'timestamp': '2025-10-01 04:36:30.119194', 'step': 14154, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:30.191857', 'step': 14154, 'epoch': 2} {'type': 'loss', 'content': 0.10753101110458374, 'timestamp': '2025-10-01 04:36:30.194019', 'step': 14155, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:36:30.249383', 'step': 14155, 'epoch': 2} {'type': 'loss', 'content': 0.1210867315530777, 'timestamp': '2025-10-01 04:36:30.255567', 'step': 14156, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:30.311037', 'step': 14156, 'epoch': 2} {'type': 'loss', 'content': 0.14080053567886353, 'timestamp': '2025-10-01 04:36:30.314492', 'step': 14157, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:30.367983', 'step': 14157, 'epoch': 2} {'type': 'loss', 'content': 0.1534818559885025, 'timestamp': '2025-10-01 04:36:30.370263', 'step': 14158, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:36:30.425159', 'step': 14158, 'epoch': 2} {'type': 'loss', 'content': 0.11579401791095734, 'timestamp': '2025-10-01 04:36:30.433334', 'step': 14159, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:30.488196', 'step': 14159, 'epoch': 2} {'type': 'loss', 'content': 0.08740744739770889, 'timestamp': '2025-10-01 04:36:30.494602', 'step': 14160, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:30.548206', 'step': 14160, 'epoch': 2} {'type': 'loss', 'content': 0.13820523023605347, 'timestamp': '2025-10-01 04:36:30.550697', 'step': 14161, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:30.608560', 'step': 14161, 'epoch': 2} {'type': 'loss', 'content': 0.12798814475536346, 'timestamp': '2025-10-01 04:36:30.611171', 'step': 14162, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:36:30.670833', 'step': 14162, 'epoch': 2} {'type': 'loss', 'content': 0.11681192368268967, 'timestamp': '2025-10-01 04:36:30.673813', 'step': 14163, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:30.727727', 'step': 14163, 'epoch': 2} {'type': 'loss', 'content': 0.0854375883936882, 'timestamp': '2025-10-01 04:36:30.733829', 'step': 14164, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:30.788061', 'step': 14164, 'epoch': 2} {'type': 'loss', 'content': 0.1778849959373474, 'timestamp': '2025-10-01 04:36:30.790342', 'step': 14165, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:36:30.847558', 'step': 14165, 'epoch': 2} {'type': 'loss', 'content': 0.12293211370706558, 'timestamp': '2025-10-01 04:36:30.850997', 'step': 14166, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:30.906030', 'step': 14166, 'epoch': 2} {'type': 'loss', 'content': 0.15744982659816742, 'timestamp': '2025-10-01 04:36:30.908478', 'step': 14167, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:30.962498', 'step': 14167, 'epoch': 2} {'type': 'loss', 'content': 0.146673783659935, 'timestamp': '2025-10-01 04:36:30.968697', 'step': 14168, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:31.022372', 'step': 14168, 'epoch': 2} {'type': 'loss', 'content': 0.037553489208221436, 'timestamp': '2025-10-01 04:36:31.024598', 'step': 14169, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:31.078570', 'step': 14169, 'epoch': 2} {'type': 'loss', 'content': 0.1656806319952011, 'timestamp': '2025-10-01 04:36:31.081018', 'step': 14170, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:36:31.135737', 'step': 14170, 'epoch': 2} {'type': 'loss', 'content': 0.19021978974342346, 'timestamp': '2025-10-01 04:36:31.138298', 'step': 14171, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:31.195555', 'step': 14171, 'epoch': 2} {'type': 'loss', 'content': 0.15944628417491913, 'timestamp': '2025-10-01 04:36:31.201950', 'step': 14172, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:31.256820', 'step': 14172, 'epoch': 2} {'type': 'loss', 'content': 0.0981416180729866, 'timestamp': '2025-10-01 04:36:31.259388', 'step': 14173, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:31.314204', 'step': 14173, 'epoch': 2} {'type': 'loss', 'content': 0.1341712772846222, 'timestamp': '2025-10-01 04:36:31.316767', 'step': 14174, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:36:31.371394', 'step': 14174, 'epoch': 2} {'type': 'loss', 'content': 0.08176866918802261, 'timestamp': '2025-10-01 04:36:31.373643', 'step': 14175, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:31.427597', 'step': 14175, 'epoch': 2} {'type': 'loss', 'content': 0.14839529991149902, 'timestamp': '2025-10-01 04:36:31.433325', 'step': 14176, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-01 04:36:31.486783', 'step': 14176, 'epoch': 2} {'type': 'loss', 'content': 0.12287198752164841, 'timestamp': '2025-10-01 04:36:31.489252', 'step': 14177, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:31.543167', 'step': 14177, 'epoch': 2} {'type': 'loss', 'content': 0.11108527332544327, 'timestamp': '2025-10-01 04:36:31.545668', 'step': 14178, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:36:31.600005', 'step': 14178, 'epoch': 2} {'type': 'loss', 'content': 0.13672947883605957, 'timestamp': '2025-10-01 04:36:31.602368', 'step': 14179, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:31.656818', 'step': 14179, 'epoch': 2} {'type': 'loss', 'content': 0.08403021097183228, 'timestamp': '2025-10-01 04:36:31.662756', 'step': 14180, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:31.716277', 'step': 14180, 'epoch': 2} {'type': 'loss', 'content': 0.11664046347141266, 'timestamp': '2025-10-01 04:36:31.718377', 'step': 14181, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:31.770908', 'step': 14181, 'epoch': 2} {'type': 'loss', 'content': 0.22814783453941345, 'timestamp': '2025-10-01 04:36:31.773044', 'step': 14182, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:31.826415', 'step': 14182, 'epoch': 2} {'type': 'loss', 'content': 0.14786648750305176, 'timestamp': '2025-10-01 04:36:31.828450', 'step': 14183, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:31.881354', 'step': 14183, 'epoch': 2} {'type': 'loss', 'content': 0.053302403539419174, 'timestamp': '2025-10-01 04:36:31.887019', 'step': 14184, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:31.939521', 'step': 14184, 'epoch': 2} {'type': 'loss', 'content': 0.07083449512720108, 'timestamp': '2025-10-01 04:36:31.941677', 'step': 14185, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:31.994946', 'step': 14185, 'epoch': 2} {'type': 'loss', 'content': 0.07071313261985779, 'timestamp': '2025-10-01 04:36:31.997502', 'step': 14186, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:32.050780', 'step': 14186, 'epoch': 2} {'type': 'loss', 'content': 0.09387322515249252, 'timestamp': '2025-10-01 04:36:32.053003', 'step': 14187, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:32.106171', 'step': 14187, 'epoch': 2} {'type': 'loss', 'content': 0.08774233609437943, 'timestamp': '2025-10-01 04:36:32.111807', 'step': 14188, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:32.164686', 'step': 14188, 'epoch': 2} {'type': 'loss', 'content': 0.10851135104894638, 'timestamp': '2025-10-01 04:36:32.166830', 'step': 14189, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:32.220261', 'step': 14189, 'epoch': 2} {'type': 'loss', 'content': 0.17552399635314941, 'timestamp': '2025-10-01 04:36:32.222285', 'step': 14190, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:32.275017', 'step': 14190, 'epoch': 2} {'type': 'loss', 'content': 0.1664751172065735, 'timestamp': '2025-10-01 04:36:32.278157', 'step': 14191, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:32.331396', 'step': 14191, 'epoch': 2} {'type': 'loss', 'content': 0.14290200173854828, 'timestamp': '2025-10-01 04:36:32.337069', 'step': 14192, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:32.389721', 'step': 14192, 'epoch': 2} {'type': 'loss', 'content': 0.11971007287502289, 'timestamp': '2025-10-01 04:36:32.391886', 'step': 14193, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:32.445266', 'step': 14193, 'epoch': 2} {'type': 'loss', 'content': 0.09543351083993912, 'timestamp': '2025-10-01 04:36:32.447296', 'step': 14194, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:32.501822', 'step': 14194, 'epoch': 2} {'type': 'loss', 'content': 0.035962723195552826, 'timestamp': '2025-10-01 04:36:32.504244', 'step': 14195, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:32.557170', 'step': 14195, 'epoch': 2} {'type': 'loss', 'content': 0.10835087299346924, 'timestamp': '2025-10-01 04:36:32.562938', 'step': 14196, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:32.615538', 'step': 14196, 'epoch': 2} {'type': 'loss', 'content': 0.13566409051418304, 'timestamp': '2025-10-01 04:36:32.617686', 'step': 14197, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:32.671526', 'step': 14197, 'epoch': 2} {'type': 'loss', 'content': 0.13114680349826813, 'timestamp': '2025-10-01 04:36:32.673702', 'step': 14198, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:32.729799', 'step': 14198, 'epoch': 2} {'type': 'loss', 'content': 0.11005011200904846, 'timestamp': '2025-10-01 04:36:32.731934', 'step': 14199, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:32.790709', 'step': 14199, 'epoch': 2} {'type': 'loss', 'content': 0.11442742496728897, 'timestamp': '2025-10-01 04:36:32.796630', 'step': 14200, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:32.859326', 'step': 14200, 'epoch': 2} {'type': 'loss', 'content': 0.1524917185306549, 'timestamp': '2025-10-01 04:36:32.861561', 'step': 14201, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:36:32.915330', 'step': 14201, 'epoch': 2} {'type': 'loss', 'content': 0.10335388034582138, 'timestamp': '2025-10-01 04:36:32.917544', 'step': 14202, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:32.970869', 'step': 14202, 'epoch': 2} {'type': 'loss', 'content': 0.12767073512077332, 'timestamp': '2025-10-01 04:36:32.972998', 'step': 14203, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:33.026660', 'step': 14203, 'epoch': 2} {'type': 'loss', 'content': 0.1096864640712738, 'timestamp': '2025-10-01 04:36:33.032507', 'step': 14204, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:33.084685', 'step': 14204, 'epoch': 2} {'type': 'loss', 'content': 0.10671009868383408, 'timestamp': '2025-10-01 04:36:33.086740', 'step': 14205, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:33.139648', 'step': 14205, 'epoch': 2} {'type': 'loss', 'content': 0.09813834726810455, 'timestamp': '2025-10-01 04:36:33.143035', 'step': 14206, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:33.197724', 'step': 14206, 'epoch': 2} {'type': 'loss', 'content': 0.15981954336166382, 'timestamp': '2025-10-01 04:36:33.199822', 'step': 14207, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:33.253579', 'step': 14207, 'epoch': 2} {'type': 'loss', 'content': 0.15078403055667877, 'timestamp': '2025-10-01 04:36:33.259273', 'step': 14208, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:33.323781', 'step': 14208, 'epoch': 2} {'type': 'loss', 'content': 0.06695520132780075, 'timestamp': '2025-10-01 04:36:33.325783', 'step': 14209, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:33.378212', 'step': 14209, 'epoch': 2} {'type': 'loss', 'content': 0.20273800194263458, 'timestamp': '2025-10-01 04:36:33.380372', 'step': 14210, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:33.433707', 'step': 14210, 'epoch': 2} {'type': 'loss', 'content': 0.07069755345582962, 'timestamp': '2025-10-01 04:36:33.435714', 'step': 14211, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:33.488729', 'step': 14211, 'epoch': 2} {'type': 'loss', 'content': 0.14309678971767426, 'timestamp': '2025-10-01 04:36:33.494422', 'step': 14212, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:33.547676', 'step': 14212, 'epoch': 2} {'type': 'loss', 'content': 0.14180034399032593, 'timestamp': '2025-10-01 04:36:33.549845', 'step': 14213, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:33.603426', 'step': 14213, 'epoch': 2} {'type': 'loss', 'content': 0.09334901720285416, 'timestamp': '2025-10-01 04:36:33.605567', 'step': 14214, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:33.658857', 'step': 14214, 'epoch': 2} {'type': 'loss', 'content': 0.12640708684921265, 'timestamp': '2025-10-01 04:36:33.661302', 'step': 14215, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:36:33.714487', 'step': 14215, 'epoch': 2} {'type': 'loss', 'content': 0.12571801245212555, 'timestamp': '2025-10-01 04:36:33.720439', 'step': 14216, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:33.773306', 'step': 14216, 'epoch': 2} {'type': 'loss', 'content': 0.10527732968330383, 'timestamp': '2025-10-01 04:36:33.775452', 'step': 14217, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:33.828824', 'step': 14217, 'epoch': 2} {'type': 'loss', 'content': 0.030768072232604027, 'timestamp': '2025-10-01 04:36:33.830930', 'step': 14218, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:33.884263', 'step': 14218, 'epoch': 2} {'type': 'loss', 'content': 0.10086540877819061, 'timestamp': '2025-10-01 04:36:33.886384', 'step': 14219, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:33.939409', 'step': 14219, 'epoch': 2} {'type': 'loss', 'content': 0.08279761672019958, 'timestamp': '2025-10-01 04:36:33.948725', 'step': 14220, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:34.001453', 'step': 14220, 'epoch': 2} {'type': 'loss', 'content': 0.15658670663833618, 'timestamp': '2025-10-01 04:36:34.003678', 'step': 14221, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:34.057303', 'step': 14221, 'epoch': 2} {'type': 'loss', 'content': 0.13874551653862, 'timestamp': '2025-10-01 04:36:34.059470', 'step': 14222, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:36:34.112941', 'step': 14222, 'epoch': 2} {'type': 'loss', 'content': 0.07720694690942764, 'timestamp': '2025-10-01 04:36:34.115132', 'step': 14223, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:34.168192', 'step': 14223, 'epoch': 2} {'type': 'loss', 'content': 0.0815802589058876, 'timestamp': '2025-10-01 04:36:34.173973', 'step': 14224, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:34.226503', 'step': 14224, 'epoch': 2} {'type': 'loss', 'content': 0.140005961060524, 'timestamp': '2025-10-01 04:36:34.228528', 'step': 14225, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:34.281180', 'step': 14225, 'epoch': 2} {'type': 'loss', 'content': 0.14653341472148895, 'timestamp': '2025-10-01 04:36:34.283304', 'step': 14226, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:34.336592', 'step': 14226, 'epoch': 2} {'type': 'loss', 'content': 0.12302792817354202, 'timestamp': '2025-10-01 04:36:34.338792', 'step': 14227, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:34.391752', 'step': 14227, 'epoch': 2} {'type': 'loss', 'content': 0.1783594936132431, 'timestamp': '2025-10-01 04:36:34.397413', 'step': 14228, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:34.449892', 'step': 14228, 'epoch': 2} {'type': 'loss', 'content': 0.038891322910785675, 'timestamp': '2025-10-01 04:36:34.452138', 'step': 14229, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:34.505368', 'step': 14229, 'epoch': 2} {'type': 'loss', 'content': 0.19603851437568665, 'timestamp': '2025-10-01 04:36:34.507582', 'step': 14230, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:36:34.561548', 'step': 14230, 'epoch': 2} {'type': 'loss', 'content': 0.08662831038236618, 'timestamp': '2025-10-01 04:36:34.563621', 'step': 14231, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:34.617124', 'step': 14231, 'epoch': 2} {'type': 'loss', 'content': 0.1620146632194519, 'timestamp': '2025-10-01 04:36:34.622833', 'step': 14232, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:36:34.675414', 'step': 14232, 'epoch': 2} {'type': 'loss', 'content': 0.08958910405635834, 'timestamp': '2025-10-01 04:36:34.677513', 'step': 14233, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:34.730296', 'step': 14233, 'epoch': 2} {'type': 'loss', 'content': 0.14246432483196259, 'timestamp': '2025-10-01 04:36:34.732356', 'step': 14234, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:34.786347', 'step': 14234, 'epoch': 2} {'type': 'loss', 'content': 0.03656626120209694, 'timestamp': '2025-10-01 04:36:34.788927', 'step': 14235, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:34.842595', 'step': 14235, 'epoch': 2} {'type': 'loss', 'content': 0.06872629374265671, 'timestamp': '2025-10-01 04:36:34.848252', 'step': 14236, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:34.901121', 'step': 14236, 'epoch': 2} {'type': 'loss', 'content': 0.07142678648233414, 'timestamp': '2025-10-01 04:36:34.903309', 'step': 14237, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:34.956148', 'step': 14237, 'epoch': 2} {'type': 'loss', 'content': 0.15094172954559326, 'timestamp': '2025-10-01 04:36:34.958236', 'step': 14238, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:35.011271', 'step': 14238, 'epoch': 2} {'type': 'loss', 'content': 0.10343535989522934, 'timestamp': '2025-10-01 04:36:35.013614', 'step': 14239, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:35.066933', 'step': 14239, 'epoch': 2} {'type': 'loss', 'content': 0.13991697132587433, 'timestamp': '2025-10-01 04:36:35.072733', 'step': 14240, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:35.125442', 'step': 14240, 'epoch': 2} {'type': 'loss', 'content': 0.05257687345147133, 'timestamp': '2025-10-01 04:36:35.127834', 'step': 14241, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:35.182179', 'step': 14241, 'epoch': 2} {'type': 'loss', 'content': 0.06199827790260315, 'timestamp': '2025-10-01 04:36:35.184402', 'step': 14242, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:35.237447', 'step': 14242, 'epoch': 2} {'type': 'loss', 'content': 0.20421849191188812, 'timestamp': '2025-10-01 04:36:35.239634', 'step': 14243, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:35.293786', 'step': 14243, 'epoch': 2} {'type': 'loss', 'content': 0.1273273080587387, 'timestamp': '2025-10-01 04:36:35.299614', 'step': 14244, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:35.353138', 'step': 14244, 'epoch': 2} {'type': 'loss', 'content': 0.14996400475502014, 'timestamp': '2025-10-01 04:36:35.355484', 'step': 14245, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:35.408253', 'step': 14245, 'epoch': 2} {'type': 'loss', 'content': 0.07378970086574554, 'timestamp': '2025-10-01 04:36:35.410333', 'step': 14246, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:35.463754', 'step': 14246, 'epoch': 2} {'type': 'loss', 'content': 0.20772968232631683, 'timestamp': '2025-10-01 04:36:35.466183', 'step': 14247, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:35.520742', 'step': 14247, 'epoch': 2} {'type': 'loss', 'content': 0.08788717538118362, 'timestamp': '2025-10-01 04:36:35.526531', 'step': 14248, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:35.583265', 'step': 14248, 'epoch': 2} {'type': 'loss', 'content': 0.1277058720588684, 'timestamp': '2025-10-01 04:36:35.585428', 'step': 14249, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:35.639083', 'step': 14249, 'epoch': 2} {'type': 'loss', 'content': 0.1358795017004013, 'timestamp': '2025-10-01 04:36:35.641205', 'step': 14250, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:35.694577', 'step': 14250, 'epoch': 2} {'type': 'loss', 'content': 0.11324355751276016, 'timestamp': '2025-10-01 04:36:35.696669', 'step': 14251, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:35.749976', 'step': 14251, 'epoch': 2} {'type': 'loss', 'content': 0.09151805192232132, 'timestamp': '2025-10-01 04:36:35.755770', 'step': 14252, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:36:35.809569', 'step': 14252, 'epoch': 2} {'type': 'loss', 'content': 0.0920318141579628, 'timestamp': '2025-10-01 04:36:35.811640', 'step': 14253, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:36:35.864598', 'step': 14253, 'epoch': 2} {'type': 'loss', 'content': 0.1194361075758934, 'timestamp': '2025-10-01 04:36:35.867245', 'step': 14254, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:35.921982', 'step': 14254, 'epoch': 2} {'type': 'loss', 'content': 0.06415946781635284, 'timestamp': '2025-10-01 04:36:35.924775', 'step': 14255, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:35.986222', 'step': 14255, 'epoch': 2} {'type': 'loss', 'content': 0.10892579704523087, 'timestamp': '2025-10-01 04:36:35.991971', 'step': 14256, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:36.045033', 'step': 14256, 'epoch': 2} {'type': 'loss', 'content': 0.11132676899433136, 'timestamp': '2025-10-01 04:36:36.046908', 'step': 14257, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:36.099869', 'step': 14257, 'epoch': 2} {'type': 'loss', 'content': 0.16095416247844696, 'timestamp': '2025-10-01 04:36:36.102134', 'step': 14258, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:36.167742', 'step': 14258, 'epoch': 2} {'type': 'loss', 'content': 0.10173163563013077, 'timestamp': '2025-10-01 04:36:36.170050', 'step': 14259, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:36.236972', 'step': 14259, 'epoch': 2} {'type': 'loss', 'content': 0.2266683280467987, 'timestamp': '2025-10-01 04:36:36.242833', 'step': 14260, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:36.296173', 'step': 14260, 'epoch': 2} {'type': 'loss', 'content': 0.12453033030033112, 'timestamp': '2025-10-01 04:36:36.298224', 'step': 14261, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:36.352388', 'step': 14261, 'epoch': 2} {'type': 'loss', 'content': 0.2080872505903244, 'timestamp': '2025-10-01 04:36:36.354881', 'step': 14262, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:36.408476', 'step': 14262, 'epoch': 2} {'type': 'loss', 'content': 0.16112814843654633, 'timestamp': '2025-10-01 04:36:36.410674', 'step': 14263, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:36.463799', 'step': 14263, 'epoch': 2} {'type': 'loss', 'content': 0.07566890865564346, 'timestamp': '2025-10-01 04:36:36.469672', 'step': 14264, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:36.522462', 'step': 14264, 'epoch': 2} {'type': 'loss', 'content': 0.1741991639137268, 'timestamp': '2025-10-01 04:36:36.524821', 'step': 14265, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:36.578099', 'step': 14265, 'epoch': 2} {'type': 'loss', 'content': 0.09963154792785645, 'timestamp': '2025-10-01 04:36:36.584542', 'step': 14266, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:36.640064', 'step': 14266, 'epoch': 2} {'type': 'loss', 'content': 0.03591572865843773, 'timestamp': '2025-10-01 04:36:36.642105', 'step': 14267, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:36.695131', 'step': 14267, 'epoch': 2} {'type': 'loss', 'content': 0.11395703256130219, 'timestamp': '2025-10-01 04:36:36.700858', 'step': 14268, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:36.753302', 'step': 14268, 'epoch': 2} {'type': 'loss', 'content': 0.11564125120639801, 'timestamp': '2025-10-01 04:36:36.755352', 'step': 14269, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:36.808545', 'step': 14269, 'epoch': 2} {'type': 'loss', 'content': 0.152526393532753, 'timestamp': '2025-10-01 04:36:36.811551', 'step': 14270, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:36.865268', 'step': 14270, 'epoch': 2} {'type': 'loss', 'content': 0.09668970108032227, 'timestamp': '2025-10-01 04:36:36.867423', 'step': 14271, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:36.921029', 'step': 14271, 'epoch': 2} {'type': 'loss', 'content': 0.1464328020811081, 'timestamp': '2025-10-01 04:36:36.926930', 'step': 14272, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:36.984772', 'step': 14272, 'epoch': 2} {'type': 'loss', 'content': 0.06144103780388832, 'timestamp': '2025-10-01 04:36:36.986952', 'step': 14273, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:37.040176', 'step': 14273, 'epoch': 2} {'type': 'loss', 'content': 0.07631931453943253, 'timestamp': '2025-10-01 04:36:37.042400', 'step': 14274, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:37.095961', 'step': 14274, 'epoch': 2} {'type': 'loss', 'content': 0.1012861356139183, 'timestamp': '2025-10-01 04:36:37.098087', 'step': 14275, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:37.153654', 'step': 14275, 'epoch': 2} {'type': 'loss', 'content': 0.08371452987194061, 'timestamp': '2025-10-01 04:36:37.159705', 'step': 14276, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:37.212405', 'step': 14276, 'epoch': 2} {'type': 'loss', 'content': 0.10486415028572083, 'timestamp': '2025-10-01 04:36:37.214627', 'step': 14277, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:37.269143', 'step': 14277, 'epoch': 2} {'type': 'loss', 'content': 0.12064468115568161, 'timestamp': '2025-10-01 04:36:37.271481', 'step': 14278, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:37.325187', 'step': 14278, 'epoch': 2} {'type': 'loss', 'content': 0.17950071394443512, 'timestamp': '2025-10-01 04:36:37.327914', 'step': 14279, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:37.380971', 'step': 14279, 'epoch': 2} {'type': 'loss', 'content': 0.07700275629758835, 'timestamp': '2025-10-01 04:36:37.386936', 'step': 14280, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:37.439923', 'step': 14280, 'epoch': 2} {'type': 'loss', 'content': 0.11802585422992706, 'timestamp': '2025-10-01 04:36:37.442803', 'step': 14281, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:37.495887', 'step': 14281, 'epoch': 2} {'type': 'loss', 'content': 0.07097287476062775, 'timestamp': '2025-10-01 04:36:37.498365', 'step': 14282, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:36:37.551809', 'step': 14282, 'epoch': 2} {'type': 'loss', 'content': 0.09702115505933762, 'timestamp': '2025-10-01 04:36:37.554147', 'step': 14283, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:36:37.607673', 'step': 14283, 'epoch': 2} {'type': 'loss', 'content': 0.1352836787700653, 'timestamp': '2025-10-01 04:36:37.613534', 'step': 14284, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:37.668243', 'step': 14284, 'epoch': 2} {'type': 'loss', 'content': 0.10426806658506393, 'timestamp': '2025-10-01 04:36:37.670383', 'step': 14285, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:37.723707', 'step': 14285, 'epoch': 2} {'type': 'loss', 'content': 0.18833783268928528, 'timestamp': '2025-10-01 04:36:37.733501', 'step': 14286, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:37.797540', 'step': 14286, 'epoch': 2} {'type': 'loss', 'content': 0.17642943561077118, 'timestamp': '2025-10-01 04:36:37.803280', 'step': 14287, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:36:37.871714', 'step': 14287, 'epoch': 2} {'type': 'loss', 'content': 0.07138556241989136, 'timestamp': '2025-10-01 04:36:37.877537', 'step': 14288, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:37.931185', 'step': 14288, 'epoch': 2} {'type': 'loss', 'content': 0.11510311812162399, 'timestamp': '2025-10-01 04:36:37.933606', 'step': 14289, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:36:37.987013', 'step': 14289, 'epoch': 2} {'type': 'loss', 'content': 0.08628398925065994, 'timestamp': '2025-10-01 04:36:37.989111', 'step': 14290, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:38.042479', 'step': 14290, 'epoch': 2} {'type': 'loss', 'content': 0.14753057062625885, 'timestamp': '2025-10-01 04:36:38.044540', 'step': 14291, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:38.097877', 'step': 14291, 'epoch': 2} {'type': 'loss', 'content': 0.06903528422117233, 'timestamp': '2025-10-01 04:36:38.103550', 'step': 14292, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:38.155766', 'step': 14292, 'epoch': 2} {'type': 'loss', 'content': 0.19099462032318115, 'timestamp': '2025-10-01 04:36:38.158025', 'step': 14293, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:36:38.215963', 'step': 14293, 'epoch': 2} {'type': 'loss', 'content': 0.11915367841720581, 'timestamp': '2025-10-01 04:36:38.218088', 'step': 14294, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:38.271816', 'step': 14294, 'epoch': 2} {'type': 'loss', 'content': 0.07711675763130188, 'timestamp': '2025-10-01 04:36:38.274106', 'step': 14295, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:38.327653', 'step': 14295, 'epoch': 2} {'type': 'loss', 'content': 0.10751467198133469, 'timestamp': '2025-10-01 04:36:38.350479', 'step': 14296, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:36:38.404263', 'step': 14296, 'epoch': 2} {'type': 'loss', 'content': 0.1371840089559555, 'timestamp': '2025-10-01 04:36:38.406498', 'step': 14297, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:38.471243', 'step': 14297, 'epoch': 2} {'type': 'loss', 'content': 0.10525926947593689, 'timestamp': '2025-10-01 04:36:38.473852', 'step': 14298, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:36:38.527850', 'step': 14298, 'epoch': 2} {'type': 'loss', 'content': 0.14266787469387054, 'timestamp': '2025-10-01 04:36:38.530054', 'step': 14299, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:38.583445', 'step': 14299, 'epoch': 2} {'type': 'loss', 'content': 0.08533386141061783, 'timestamp': '2025-10-01 04:36:38.589143', 'step': 14300, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:36:38.642268', 'step': 14300, 'epoch': 2} {'type': 'loss', 'content': 0.09654667228460312, 'timestamp': '2025-10-01 04:36:38.644489', 'step': 14301, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:38.698181', 'step': 14301, 'epoch': 2} {'type': 'loss', 'content': 0.08236909657716751, 'timestamp': '2025-10-01 04:36:38.700800', 'step': 14302, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:38.753911', 'step': 14302, 'epoch': 2} {'type': 'loss', 'content': 0.09400177001953125, 'timestamp': '2025-10-01 04:36:38.756013', 'step': 14303, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:38.813465', 'step': 14303, 'epoch': 2} {'type': 'loss', 'content': 0.0964910164475441, 'timestamp': '2025-10-01 04:36:38.819296', 'step': 14304, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:38.872092', 'step': 14304, 'epoch': 2} {'type': 'loss', 'content': 0.12771528959274292, 'timestamp': '2025-10-01 04:36:38.874195', 'step': 14305, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:38.927870', 'step': 14305, 'epoch': 2} {'type': 'loss', 'content': 0.07559899240732193, 'timestamp': '2025-10-01 04:36:38.933478', 'step': 14306, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:38.987573', 'step': 14306, 'epoch': 2} {'type': 'loss', 'content': 0.07540249824523926, 'timestamp': '2025-10-01 04:36:38.989847', 'step': 14307, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:39.054293', 'step': 14307, 'epoch': 2} {'type': 'loss', 'content': 0.11477258056402206, 'timestamp': '2025-10-01 04:36:39.060399', 'step': 14308, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:39.114009', 'step': 14308, 'epoch': 2} {'type': 'loss', 'content': 0.1228124275803566, 'timestamp': '2025-10-01 04:36:39.116236', 'step': 14309, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:39.178018', 'step': 14309, 'epoch': 2} {'type': 'loss', 'content': 0.08975841850042343, 'timestamp': '2025-10-01 04:36:39.180424', 'step': 14310, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:39.234667', 'step': 14310, 'epoch': 2} {'type': 'loss', 'content': 0.07325366139411926, 'timestamp': '2025-10-01 04:36:39.236819', 'step': 14311, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:39.299205', 'step': 14311, 'epoch': 2} {'type': 'loss', 'content': 0.05781988427042961, 'timestamp': '2025-10-01 04:36:39.305243', 'step': 14312, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:39.359242', 'step': 14312, 'epoch': 2} {'type': 'loss', 'content': 0.0883532389998436, 'timestamp': '2025-10-01 04:36:39.361454', 'step': 14313, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:36:39.425234', 'step': 14313, 'epoch': 2} {'type': 'loss', 'content': 0.13224640488624573, 'timestamp': '2025-10-01 04:36:39.428054', 'step': 14314, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:39.483295', 'step': 14314, 'epoch': 2} {'type': 'loss', 'content': 0.14953114092350006, 'timestamp': '2025-10-01 04:36:39.486002', 'step': 14315, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:39.540793', 'step': 14315, 'epoch': 2} {'type': 'loss', 'content': 0.13025560975074768, 'timestamp': '2025-10-01 04:36:39.546966', 'step': 14316, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:39.601238', 'step': 14316, 'epoch': 2} {'type': 'loss', 'content': 0.04736175015568733, 'timestamp': '2025-10-01 04:36:39.603921', 'step': 14317, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:39.658469', 'step': 14317, 'epoch': 2} {'type': 'loss', 'content': 0.08498287200927734, 'timestamp': '2025-10-01 04:36:39.661035', 'step': 14318, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:36:39.729707', 'step': 14318, 'epoch': 2} {'type': 'loss', 'content': 0.11520817130804062, 'timestamp': '2025-10-01 04:36:39.732176', 'step': 14319, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:39.787573', 'step': 14319, 'epoch': 2} {'type': 'loss', 'content': 0.084335558116436, 'timestamp': '2025-10-01 04:36:39.795644', 'step': 14320, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:39.849892', 'step': 14320, 'epoch': 2} {'type': 'loss', 'content': 0.10523995757102966, 'timestamp': '2025-10-01 04:36:39.852327', 'step': 14321, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:39.906501', 'step': 14321, 'epoch': 2} {'type': 'loss', 'content': 0.10097865015268326, 'timestamp': '2025-10-01 04:36:39.908996', 'step': 14322, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:39.964666', 'step': 14322, 'epoch': 2} {'type': 'loss', 'content': 0.21460579335689545, 'timestamp': '2025-10-01 04:36:39.967038', 'step': 14323, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:40.021769', 'step': 14323, 'epoch': 2} {'type': 'loss', 'content': 0.16219528019428253, 'timestamp': '2025-10-01 04:36:40.027445', 'step': 14324, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:40.080719', 'step': 14324, 'epoch': 2} {'type': 'loss', 'content': 0.059708233922719955, 'timestamp': '2025-10-01 04:36:40.083606', 'step': 14325, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:40.144084', 'step': 14325, 'epoch': 2} {'type': 'loss', 'content': 0.07044728845357895, 'timestamp': '2025-10-01 04:36:40.146974', 'step': 14326, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:40.201081', 'step': 14326, 'epoch': 2} {'type': 'loss', 'content': 0.11982958763837814, 'timestamp': '2025-10-01 04:36:40.204241', 'step': 14327, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:40.259441', 'step': 14327, 'epoch': 2} {'type': 'loss', 'content': 0.08281730860471725, 'timestamp': '2025-10-01 04:36:40.265426', 'step': 14328, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:40.320472', 'step': 14328, 'epoch': 2} {'type': 'loss', 'content': 0.11879251152276993, 'timestamp': '2025-10-01 04:36:40.334131', 'step': 14329, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:40.388383', 'step': 14329, 'epoch': 2} {'type': 'loss', 'content': 0.11023630946874619, 'timestamp': '2025-10-01 04:36:40.391775', 'step': 14330, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:40.446858', 'step': 14330, 'epoch': 2} {'type': 'loss', 'content': 0.14731818437576294, 'timestamp': '2025-10-01 04:36:40.449172', 'step': 14331, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:40.512084', 'step': 14331, 'epoch': 2} {'type': 'loss', 'content': 0.1444023996591568, 'timestamp': '2025-10-01 04:36:40.518043', 'step': 14332, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:40.570637', 'step': 14332, 'epoch': 2} {'type': 'loss', 'content': 0.14518114924430847, 'timestamp': '2025-10-01 04:36:40.573624', 'step': 14333, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:36:40.629896', 'step': 14333, 'epoch': 2} {'type': 'loss', 'content': 0.1360418051481247, 'timestamp': '2025-10-01 04:36:40.632148', 'step': 14334, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:40.690887', 'step': 14334, 'epoch': 2} {'type': 'loss', 'content': 0.1746048927307129, 'timestamp': '2025-10-01 04:36:40.695771', 'step': 14335, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:36:40.748803', 'step': 14335, 'epoch': 2} {'type': 'loss', 'content': 0.1151392012834549, 'timestamp': '2025-10-01 04:36:40.754519', 'step': 14336, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:40.807223', 'step': 14336, 'epoch': 2} {'type': 'loss', 'content': 0.14105726778507233, 'timestamp': '2025-10-01 04:36:40.809403', 'step': 14337, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:40.862352', 'step': 14337, 'epoch': 2} {'type': 'loss', 'content': 0.13329438865184784, 'timestamp': '2025-10-01 04:36:40.864571', 'step': 14338, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:40.918850', 'step': 14338, 'epoch': 2} {'type': 'loss', 'content': 0.105117067694664, 'timestamp': '2025-10-01 04:36:40.921168', 'step': 14339, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:40.974891', 'step': 14339, 'epoch': 2} {'type': 'loss', 'content': 0.06013983115553856, 'timestamp': '2025-10-01 04:36:40.980764', 'step': 14340, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:36:41.038386', 'step': 14340, 'epoch': 2} {'type': 'loss', 'content': 0.07101651281118393, 'timestamp': '2025-10-01 04:36:41.044021', 'step': 14341, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:41.109012', 'step': 14341, 'epoch': 2} {'type': 'loss', 'content': 0.23468072712421417, 'timestamp': '2025-10-01 04:36:41.111381', 'step': 14342, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:41.165233', 'step': 14342, 'epoch': 2} {'type': 'loss', 'content': 0.0785684734582901, 'timestamp': '2025-10-01 04:36:41.169269', 'step': 14343, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:36:41.222801', 'step': 14343, 'epoch': 2} {'type': 'loss', 'content': 0.1484784483909607, 'timestamp': '2025-10-01 04:36:41.228794', 'step': 14344, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:41.291943', 'step': 14344, 'epoch': 2} {'type': 'loss', 'content': 0.1018570065498352, 'timestamp': '2025-10-01 04:36:41.294288', 'step': 14345, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:41.348224', 'step': 14345, 'epoch': 2} {'type': 'loss', 'content': 0.09133869409561157, 'timestamp': '2025-10-01 04:36:41.350373', 'step': 14346, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:41.408657', 'step': 14346, 'epoch': 2} {'type': 'loss', 'content': 0.07490450888872147, 'timestamp': '2025-10-01 04:36:41.410716', 'step': 14347, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:36:41.464472', 'step': 14347, 'epoch': 2} {'type': 'loss', 'content': 0.08011926710605621, 'timestamp': '2025-10-01 04:36:41.470273', 'step': 14348, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:41.523559', 'step': 14348, 'epoch': 2} {'type': 'loss', 'content': 0.08643961697816849, 'timestamp': '2025-10-01 04:36:41.525750', 'step': 14349, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:41.578872', 'step': 14349, 'epoch': 2} {'type': 'loss', 'content': 0.07464431971311569, 'timestamp': '2025-10-01 04:36:41.581010', 'step': 14350, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:41.635677', 'step': 14350, 'epoch': 2} {'type': 'loss', 'content': 0.14251162111759186, 'timestamp': '2025-10-01 04:36:41.637964', 'step': 14351, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:41.692586', 'step': 14351, 'epoch': 2} {'type': 'loss', 'content': 0.13256646692752838, 'timestamp': '2025-10-01 04:36:41.698351', 'step': 14352, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:41.750857', 'step': 14352, 'epoch': 2} {'type': 'loss', 'content': 0.17819225788116455, 'timestamp': '2025-10-01 04:36:41.752954', 'step': 14353, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:41.806370', 'step': 14353, 'epoch': 2} {'type': 'loss', 'content': 0.07963105291128159, 'timestamp': '2025-10-01 04:36:41.808462', 'step': 14354, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:41.862118', 'step': 14354, 'epoch': 2} {'type': 'loss', 'content': 0.12944817543029785, 'timestamp': '2025-10-01 04:36:41.864213', 'step': 14355, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:41.924985', 'step': 14355, 'epoch': 2} {'type': 'loss', 'content': 0.13745510578155518, 'timestamp': '2025-10-01 04:36:41.930623', 'step': 14356, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:41.984041', 'step': 14356, 'epoch': 2} {'type': 'loss', 'content': 0.09058883786201477, 'timestamp': '2025-10-01 04:36:41.986773', 'step': 14357, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:42.040252', 'step': 14357, 'epoch': 2} {'type': 'loss', 'content': 0.10548397153615952, 'timestamp': '2025-10-01 04:36:42.042655', 'step': 14358, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:42.096304', 'step': 14358, 'epoch': 2} {'type': 'loss', 'content': 0.13063457608222961, 'timestamp': '2025-10-01 04:36:42.100235', 'step': 14359, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:36:42.153725', 'step': 14359, 'epoch': 2} {'type': 'loss', 'content': 0.0910773053765297, 'timestamp': '2025-10-01 04:36:42.159505', 'step': 14360, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:36:42.212724', 'step': 14360, 'epoch': 2} {'type': 'loss', 'content': 0.08871060609817505, 'timestamp': '2025-10-01 04:36:42.214781', 'step': 14361, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:42.268802', 'step': 14361, 'epoch': 2} {'type': 'loss', 'content': 0.20873427391052246, 'timestamp': '2025-10-01 04:36:42.271106', 'step': 14362, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:36:42.325091', 'step': 14362, 'epoch': 2} {'type': 'loss', 'content': 0.18927161395549774, 'timestamp': '2025-10-01 04:36:42.327278', 'step': 14363, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:42.382837', 'step': 14363, 'epoch': 2} {'type': 'loss', 'content': 0.10184469074010849, 'timestamp': '2025-10-01 04:36:42.389088', 'step': 14364, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:42.444297', 'step': 14364, 'epoch': 2} {'type': 'loss', 'content': 0.1605817824602127, 'timestamp': '2025-10-01 04:36:42.447297', 'step': 14365, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:42.500949', 'step': 14365, 'epoch': 2} {'type': 'loss', 'content': 0.20672009885311127, 'timestamp': '2025-10-01 04:36:42.503321', 'step': 14366, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:36:42.557981', 'step': 14366, 'epoch': 2} {'type': 'loss', 'content': 0.11031104624271393, 'timestamp': '2025-10-01 04:36:42.562935', 'step': 14367, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:42.617276', 'step': 14367, 'epoch': 2} {'type': 'loss', 'content': 0.0692765936255455, 'timestamp': '2025-10-01 04:36:42.623743', 'step': 14368, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:42.677954', 'step': 14368, 'epoch': 2} {'type': 'loss', 'content': 0.12126859277486801, 'timestamp': '2025-10-01 04:36:42.680150', 'step': 14369, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:42.736291', 'step': 14369, 'epoch': 2} {'type': 'loss', 'content': 0.1914258748292923, 'timestamp': '2025-10-01 04:36:42.738652', 'step': 14370, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:42.792465', 'step': 14370, 'epoch': 2} {'type': 'loss', 'content': 0.09578705579042435, 'timestamp': '2025-10-01 04:36:42.794998', 'step': 14371, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:42.848857', 'step': 14371, 'epoch': 2} {'type': 'loss', 'content': 0.08005258440971375, 'timestamp': '2025-10-01 04:36:42.854739', 'step': 14372, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:42.907924', 'step': 14372, 'epoch': 2} {'type': 'loss', 'content': 0.09644664824008942, 'timestamp': '2025-10-01 04:36:42.910271', 'step': 14373, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:42.964154', 'step': 14373, 'epoch': 2} {'type': 'loss', 'content': 0.10859145224094391, 'timestamp': '2025-10-01 04:36:42.967018', 'step': 14374, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:43.020994', 'step': 14374, 'epoch': 2} {'type': 'loss', 'content': 0.17465417087078094, 'timestamp': '2025-10-01 04:36:43.023199', 'step': 14375, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:43.077151', 'step': 14375, 'epoch': 2} {'type': 'loss', 'content': 0.10375455766916275, 'timestamp': '2025-10-01 04:36:43.082958', 'step': 14376, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:36:43.135968', 'step': 14376, 'epoch': 2} {'type': 'loss', 'content': 0.0709785744547844, 'timestamp': '2025-10-01 04:36:43.139623', 'step': 14377, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:43.195916', 'step': 14377, 'epoch': 2} {'type': 'loss', 'content': 0.04420420899987221, 'timestamp': '2025-10-01 04:36:43.198021', 'step': 14378, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:36:43.251361', 'step': 14378, 'epoch': 2} {'type': 'loss', 'content': 0.15220539271831512, 'timestamp': '2025-10-01 04:36:43.253501', 'step': 14379, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:43.313160', 'step': 14379, 'epoch': 2} {'type': 'loss', 'content': 0.0628931000828743, 'timestamp': '2025-10-01 04:36:43.318895', 'step': 14380, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:43.371883', 'step': 14380, 'epoch': 2} {'type': 'loss', 'content': 0.05381862819194794, 'timestamp': '2025-10-01 04:36:43.373991', 'step': 14381, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:43.428723', 'step': 14381, 'epoch': 2} {'type': 'loss', 'content': 0.10566992312669754, 'timestamp': '2025-10-01 04:36:43.431833', 'step': 14382, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:43.485411', 'step': 14382, 'epoch': 2} {'type': 'loss', 'content': 0.11997920274734497, 'timestamp': '2025-10-01 04:36:43.487697', 'step': 14383, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:43.547873', 'step': 14383, 'epoch': 2} {'type': 'loss', 'content': 0.09022574126720428, 'timestamp': '2025-10-01 04:36:43.553769', 'step': 14384, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:43.609150', 'step': 14384, 'epoch': 2} {'type': 'loss', 'content': 0.07641993463039398, 'timestamp': '2025-10-01 04:36:43.611402', 'step': 14385, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:36:43.669330', 'step': 14385, 'epoch': 2} {'type': 'loss', 'content': 0.12060494720935822, 'timestamp': '2025-10-01 04:36:43.671490', 'step': 14386, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:43.725434', 'step': 14386, 'epoch': 2} {'type': 'loss', 'content': 0.10920387506484985, 'timestamp': '2025-10-01 04:36:43.730869', 'step': 14387, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:43.784550', 'step': 14387, 'epoch': 2} {'type': 'loss', 'content': 0.11885412782430649, 'timestamp': '2025-10-01 04:36:43.790611', 'step': 14388, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:43.847724', 'step': 14388, 'epoch': 2} {'type': 'loss', 'content': 0.1658114790916443, 'timestamp': '2025-10-01 04:36:43.850776', 'step': 14389, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:43.904910', 'step': 14389, 'epoch': 2} {'type': 'loss', 'content': 0.17379215359687805, 'timestamp': '2025-10-01 04:36:43.907362', 'step': 14390, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:43.961715', 'step': 14390, 'epoch': 2} {'type': 'loss', 'content': 0.060784269124269485, 'timestamp': '2025-10-01 04:36:43.963892', 'step': 14391, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:44.018592', 'step': 14391, 'epoch': 2} {'type': 'loss', 'content': 0.05322035774588585, 'timestamp': '2025-10-01 04:36:44.025174', 'step': 14392, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:36:44.082662', 'step': 14392, 'epoch': 2} {'type': 'loss', 'content': 0.04423578828573227, 'timestamp': '2025-10-01 04:36:44.084904', 'step': 14393, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:44.143131', 'step': 14393, 'epoch': 2} {'type': 'loss', 'content': 0.09241209924221039, 'timestamp': '2025-10-01 04:36:44.145723', 'step': 14394, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:44.202734', 'step': 14394, 'epoch': 2} {'type': 'loss', 'content': 0.06795283406972885, 'timestamp': '2025-10-01 04:36:44.205066', 'step': 14395, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:44.261945', 'step': 14395, 'epoch': 2} {'type': 'loss', 'content': 0.09792159497737885, 'timestamp': '2025-10-01 04:36:44.268558', 'step': 14396, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:44.324916', 'step': 14396, 'epoch': 2} {'type': 'loss', 'content': 0.0942339301109314, 'timestamp': '2025-10-01 04:36:44.327587', 'step': 14397, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:44.392130', 'step': 14397, 'epoch': 2} {'type': 'loss', 'content': 0.07175706326961517, 'timestamp': '2025-10-01 04:36:44.394450', 'step': 14398, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-01 04:36:44.451486', 'step': 14398, 'epoch': 2} {'type': 'loss', 'content': 0.10502395033836365, 'timestamp': '2025-10-01 04:36:44.453670', 'step': 14399, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:44.509816', 'step': 14399, 'epoch': 2} {'type': 'loss', 'content': 0.13303671777248383, 'timestamp': '2025-10-01 04:36:44.516998', 'step': 14400, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:44.573741', 'step': 14400, 'epoch': 2} {'type': 'loss', 'content': 0.15543939173221588, 'timestamp': '2025-10-01 04:36:44.576363', 'step': 14401, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:44.632743', 'step': 14401, 'epoch': 2} {'type': 'loss', 'content': 0.10319459438323975, 'timestamp': '2025-10-01 04:36:44.635505', 'step': 14402, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:44.692954', 'step': 14402, 'epoch': 2} {'type': 'loss', 'content': 0.13794931769371033, 'timestamp': '2025-10-01 04:36:44.695171', 'step': 14403, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:44.750505', 'step': 14403, 'epoch': 2} {'type': 'loss', 'content': 0.07570874691009521, 'timestamp': '2025-10-01 04:36:44.756775', 'step': 14404, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:44.809978', 'step': 14404, 'epoch': 2} {'type': 'loss', 'content': 0.11275368928909302, 'timestamp': '2025-10-01 04:36:44.812086', 'step': 14405, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:44.871051', 'step': 14405, 'epoch': 2} {'type': 'loss', 'content': 0.1056671068072319, 'timestamp': '2025-10-01 04:36:44.873233', 'step': 14406, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:44.930124', 'step': 14406, 'epoch': 2} {'type': 'loss', 'content': 0.07638908177614212, 'timestamp': '2025-10-01 04:36:44.934943', 'step': 14407, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:36:44.989464', 'step': 14407, 'epoch': 2} {'type': 'loss', 'content': 0.08530036360025406, 'timestamp': '2025-10-01 04:36:44.995368', 'step': 14408, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:45.048495', 'step': 14408, 'epoch': 2} {'type': 'loss', 'content': 0.11842093616724014, 'timestamp': '2025-10-01 04:36:45.051146', 'step': 14409, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:36:45.104342', 'step': 14409, 'epoch': 2} {'type': 'loss', 'content': 0.1967180371284485, 'timestamp': '2025-10-01 04:36:45.106692', 'step': 14410, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:45.160301', 'step': 14410, 'epoch': 2} {'type': 'loss', 'content': 0.09954909235239029, 'timestamp': '2025-10-01 04:36:45.162534', 'step': 14411, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:45.224138', 'step': 14411, 'epoch': 2} {'type': 'loss', 'content': 0.0744853988289833, 'timestamp': '2025-10-01 04:36:45.230112', 'step': 14412, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:45.283759', 'step': 14412, 'epoch': 2} {'type': 'loss', 'content': 0.14621277153491974, 'timestamp': '2025-10-01 04:36:45.288208', 'step': 14413, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:45.342254', 'step': 14413, 'epoch': 2} {'type': 'loss', 'content': 0.08210142701864243, 'timestamp': '2025-10-01 04:36:45.344531', 'step': 14414, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:36:45.398159', 'step': 14414, 'epoch': 2} {'type': 'loss', 'content': 0.20243312418460846, 'timestamp': '2025-10-01 04:36:45.406163', 'step': 14415, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:45.459429', 'step': 14415, 'epoch': 2} {'type': 'loss', 'content': 0.08341164141893387, 'timestamp': '2025-10-01 04:36:45.465634', 'step': 14416, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:45.519088', 'step': 14416, 'epoch': 2} {'type': 'loss', 'content': 0.26251649856567383, 'timestamp': '2025-10-01 04:36:45.524716', 'step': 14417, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:45.578540', 'step': 14417, 'epoch': 2} {'type': 'loss', 'content': 0.0728636309504509, 'timestamp': '2025-10-01 04:36:45.581174', 'step': 14418, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:45.635505', 'step': 14418, 'epoch': 2} {'type': 'loss', 'content': 0.17099416255950928, 'timestamp': '2025-10-01 04:36:45.637836', 'step': 14419, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:36:45.694209', 'step': 14419, 'epoch': 2} {'type': 'loss', 'content': 0.13147619366645813, 'timestamp': '2025-10-01 04:36:45.706804', 'step': 14420, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:45.772767', 'step': 14420, 'epoch': 2} {'type': 'loss', 'content': 0.15570895373821259, 'timestamp': '2025-10-01 04:36:45.777528', 'step': 14421, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:45.831404', 'step': 14421, 'epoch': 2} {'type': 'loss', 'content': 0.12625224888324738, 'timestamp': '2025-10-01 04:36:45.836783', 'step': 14422, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:45.890750', 'step': 14422, 'epoch': 2} {'type': 'loss', 'content': 0.08170492947101593, 'timestamp': '2025-10-01 04:36:45.893091', 'step': 14423, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:45.959404', 'step': 14423, 'epoch': 2} {'type': 'loss', 'content': 0.10235806554555893, 'timestamp': '2025-10-01 04:36:45.965214', 'step': 14424, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:46.025204', 'step': 14424, 'epoch': 2} {'type': 'loss', 'content': 0.10835640877485275, 'timestamp': '2025-10-01 04:36:46.027601', 'step': 14425, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:46.080793', 'step': 14425, 'epoch': 2} {'type': 'loss', 'content': 0.05090579017996788, 'timestamp': '2025-10-01 04:36:46.082948', 'step': 14426, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:46.136605', 'step': 14426, 'epoch': 2} {'type': 'loss', 'content': 0.10469576716423035, 'timestamp': '2025-10-01 04:36:46.138818', 'step': 14427, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:46.193189', 'step': 14427, 'epoch': 2} {'type': 'loss', 'content': 0.11549503356218338, 'timestamp': '2025-10-01 04:36:46.199019', 'step': 14428, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:46.261313', 'step': 14428, 'epoch': 2} {'type': 'loss', 'content': 0.0257786326110363, 'timestamp': '2025-10-01 04:36:46.263432', 'step': 14429, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:46.316933', 'step': 14429, 'epoch': 2} {'type': 'loss', 'content': 0.10954301059246063, 'timestamp': '2025-10-01 04:36:46.319214', 'step': 14430, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:46.377670', 'step': 14430, 'epoch': 2} {'type': 'loss', 'content': 0.11894958466291428, 'timestamp': '2025-10-01 04:36:46.380050', 'step': 14431, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:46.445136', 'step': 14431, 'epoch': 2} {'type': 'loss', 'content': 0.0961541086435318, 'timestamp': '2025-10-01 04:36:46.452021', 'step': 14432, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:36:46.507112', 'step': 14432, 'epoch': 2} {'type': 'loss', 'content': 0.26652711629867554, 'timestamp': '2025-10-01 04:36:46.509947', 'step': 14433, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:46.564863', 'step': 14433, 'epoch': 2} {'type': 'loss', 'content': 0.13847966492176056, 'timestamp': '2025-10-01 04:36:46.566989', 'step': 14434, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:36:46.622012', 'step': 14434, 'epoch': 2} {'type': 'loss', 'content': 0.17376412451267242, 'timestamp': '2025-10-01 04:36:46.624141', 'step': 14435, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:46.678349', 'step': 14435, 'epoch': 2} {'type': 'loss', 'content': 0.10152546316385269, 'timestamp': '2025-10-01 04:36:46.684262', 'step': 14436, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:46.737102', 'step': 14436, 'epoch': 2} {'type': 'loss', 'content': 0.17559370398521423, 'timestamp': '2025-10-01 04:36:46.739332', 'step': 14437, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:46.792679', 'step': 14437, 'epoch': 2} {'type': 'loss', 'content': 0.22514241933822632, 'timestamp': '2025-10-01 04:36:46.794915', 'step': 14438, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:46.848274', 'step': 14438, 'epoch': 2} {'type': 'loss', 'content': 0.10879746079444885, 'timestamp': '2025-10-01 04:36:46.850783', 'step': 14439, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:46.904037', 'step': 14439, 'epoch': 2} {'type': 'loss', 'content': 0.1407967060804367, 'timestamp': '2025-10-01 04:36:46.909779', 'step': 14440, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:46.971264', 'step': 14440, 'epoch': 2} {'type': 'loss', 'content': 0.12586870789527893, 'timestamp': '2025-10-01 04:36:46.973623', 'step': 14441, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:47.026036', 'step': 14441, 'epoch': 2} {'type': 'loss', 'content': 0.05484936386346817, 'timestamp': '2025-10-01 04:36:47.028129', 'step': 14442, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:36:47.081250', 'step': 14442, 'epoch': 2} {'type': 'loss', 'content': 0.17261315882205963, 'timestamp': '2025-10-01 04:36:47.083365', 'step': 14443, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:47.139977', 'step': 14443, 'epoch': 2} {'type': 'loss', 'content': 0.05257619917392731, 'timestamp': '2025-10-01 04:36:47.145883', 'step': 14444, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:36:47.200551', 'step': 14444, 'epoch': 2} {'type': 'loss', 'content': 0.07569045573472977, 'timestamp': '2025-10-01 04:36:47.202875', 'step': 14445, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:36:47.256802', 'step': 14445, 'epoch': 2} {'type': 'loss', 'content': 0.1715962290763855, 'timestamp': '2025-10-01 04:36:47.263005', 'step': 14446, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-01 04:37:00.561153', 'step': 14446, 'epoch': 2} {'type': 'pplx', 'content': 9992.332643582751, 'timestamp': '2025-10-01 04:37:00.564370', 'step': 14446, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:00.619448', 'step': 14446, 'epoch': 2} {'type': 'loss', 'content': 0.1659669578075409, 'timestamp': '2025-10-01 04:37:00.621573', 'step': 14447, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:00.677125', 'step': 14447, 'epoch': 2} {'type': 'loss', 'content': 0.062405530363321304, 'timestamp': '2025-10-01 04:37:00.683665', 'step': 14448, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:00.739643', 'step': 14448, 'epoch': 2} {'type': 'loss', 'content': 0.05352342873811722, 'timestamp': '2025-10-01 04:37:00.742181', 'step': 14449, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:00.800203', 'step': 14449, 'epoch': 2} {'type': 'loss', 'content': 0.153146430850029, 'timestamp': '2025-10-01 04:37:00.803836', 'step': 14450, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:00.861666', 'step': 14450, 'epoch': 2} {'type': 'loss', 'content': 0.17555929720401764, 'timestamp': '2025-10-01 04:37:00.863934', 'step': 14451, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:00.921010', 'step': 14451, 'epoch': 2} {'type': 'loss', 'content': 0.10854282230138779, 'timestamp': '2025-10-01 04:37:00.927575', 'step': 14452, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:00.987918', 'step': 14452, 'epoch': 2} {'type': 'loss', 'content': 0.12519560754299164, 'timestamp': '2025-10-01 04:37:00.990177', 'step': 14453, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:37:01.043886', 'step': 14453, 'epoch': 2} {'type': 'loss', 'content': 0.07108402252197266, 'timestamp': '2025-10-01 04:37:01.046123', 'step': 14454, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:01.100111', 'step': 14454, 'epoch': 2} {'type': 'loss', 'content': 0.20502959191799164, 'timestamp': '2025-10-01 04:37:01.102186', 'step': 14455, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:01.156958', 'step': 14455, 'epoch': 2} {'type': 'loss', 'content': 0.07611925899982452, 'timestamp': '2025-10-01 04:37:01.162965', 'step': 14456, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:37:01.215407', 'step': 14456, 'epoch': 2} {'type': 'loss', 'content': 0.09239007532596588, 'timestamp': '2025-10-01 04:37:01.217888', 'step': 14457, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:01.271550', 'step': 14457, 'epoch': 2} {'type': 'loss', 'content': 0.15288902819156647, 'timestamp': '2025-10-01 04:37:01.273631', 'step': 14458, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:01.327412', 'step': 14458, 'epoch': 2} {'type': 'loss', 'content': 0.10009941458702087, 'timestamp': '2025-10-01 04:37:01.329750', 'step': 14459, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:01.382985', 'step': 14459, 'epoch': 2} {'type': 'loss', 'content': 0.20011529326438904, 'timestamp': '2025-10-01 04:37:01.388975', 'step': 14460, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:01.441559', 'step': 14460, 'epoch': 2} {'type': 'loss', 'content': 0.06285763531923294, 'timestamp': '2025-10-01 04:37:01.443644', 'step': 14461, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:01.500076', 'step': 14461, 'epoch': 2} {'type': 'loss', 'content': 0.11162212491035461, 'timestamp': '2025-10-01 04:37:01.501967', 'step': 14462, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:01.554728', 'step': 14462, 'epoch': 2} {'type': 'loss', 'content': 0.190627321600914, 'timestamp': '2025-10-01 04:37:01.557717', 'step': 14463, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:01.618325', 'step': 14463, 'epoch': 2} {'type': 'loss', 'content': 0.12145949900150299, 'timestamp': '2025-10-01 04:37:01.628086', 'step': 14464, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:01.685494', 'step': 14464, 'epoch': 2} {'type': 'loss', 'content': 0.10397373884916306, 'timestamp': '2025-10-01 04:37:01.687718', 'step': 14465, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:01.742752', 'step': 14465, 'epoch': 2} {'type': 'loss', 'content': 0.09863568842411041, 'timestamp': '2025-10-01 04:37:01.744888', 'step': 14466, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:01.807788', 'step': 14466, 'epoch': 2} {'type': 'loss', 'content': 0.11669661104679108, 'timestamp': '2025-10-01 04:37:01.813011', 'step': 14467, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:01.866092', 'step': 14467, 'epoch': 2} {'type': 'loss', 'content': 0.08014902472496033, 'timestamp': '2025-10-01 04:37:01.874734', 'step': 14468, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:01.933108', 'step': 14468, 'epoch': 2} {'type': 'loss', 'content': 0.13964037597179413, 'timestamp': '2025-10-01 04:37:01.935245', 'step': 14469, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:01.988064', 'step': 14469, 'epoch': 2} {'type': 'loss', 'content': 0.0880580022931099, 'timestamp': '2025-10-01 04:37:01.991134', 'step': 14470, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:02.047417', 'step': 14470, 'epoch': 2} {'type': 'loss', 'content': 0.08284549415111542, 'timestamp': '2025-10-01 04:37:02.059476', 'step': 14471, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:02.113700', 'step': 14471, 'epoch': 2} {'type': 'loss', 'content': 0.1358802765607834, 'timestamp': '2025-10-01 04:37:02.119518', 'step': 14472, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:02.172418', 'step': 14472, 'epoch': 2} {'type': 'loss', 'content': 0.1770188808441162, 'timestamp': '2025-10-01 04:37:02.175129', 'step': 14473, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:02.228534', 'step': 14473, 'epoch': 2} {'type': 'loss', 'content': 0.2008434683084488, 'timestamp': '2025-10-01 04:37:02.231008', 'step': 14474, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:02.284551', 'step': 14474, 'epoch': 2} {'type': 'loss', 'content': 0.128463476896286, 'timestamp': '2025-10-01 04:37:02.287113', 'step': 14475, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:02.343500', 'step': 14475, 'epoch': 2} {'type': 'loss', 'content': 0.1765012890100479, 'timestamp': '2025-10-01 04:37:02.351039', 'step': 14476, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:02.406406', 'step': 14476, 'epoch': 2} {'type': 'loss', 'content': 0.18080371618270874, 'timestamp': '2025-10-01 04:37:02.409164', 'step': 14477, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:02.473786', 'step': 14477, 'epoch': 2} {'type': 'loss', 'content': 0.1763351559638977, 'timestamp': '2025-10-01 04:37:02.475939', 'step': 14478, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:02.530222', 'step': 14478, 'epoch': 2} {'type': 'loss', 'content': 0.14372259378433228, 'timestamp': '2025-10-01 04:37:02.532483', 'step': 14479, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:02.592319', 'step': 14479, 'epoch': 2} {'type': 'loss', 'content': 0.055198896676301956, 'timestamp': '2025-10-01 04:37:02.599435', 'step': 14480, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:02.652575', 'step': 14480, 'epoch': 2} {'type': 'loss', 'content': 0.14093905687332153, 'timestamp': '2025-10-01 04:37:02.654717', 'step': 14481, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:02.708800', 'step': 14481, 'epoch': 2} {'type': 'loss', 'content': 0.09465227276086807, 'timestamp': '2025-10-01 04:37:02.711251', 'step': 14482, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:37:02.771637', 'step': 14482, 'epoch': 2} {'type': 'loss', 'content': 0.10322000831365585, 'timestamp': '2025-10-01 04:37:02.774007', 'step': 14483, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:02.833287', 'step': 14483, 'epoch': 2} {'type': 'loss', 'content': 0.05693313479423523, 'timestamp': '2025-10-01 04:37:02.839587', 'step': 14484, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:02.893522', 'step': 14484, 'epoch': 2} {'type': 'loss', 'content': 0.15537290275096893, 'timestamp': '2025-10-01 04:37:02.895590', 'step': 14485, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:02.948670', 'step': 14485, 'epoch': 2} {'type': 'loss', 'content': 0.1811191439628601, 'timestamp': '2025-10-01 04:37:02.950859', 'step': 14486, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:03.005599', 'step': 14486, 'epoch': 2} {'type': 'loss', 'content': 0.11329609155654907, 'timestamp': '2025-10-01 04:37:03.007780', 'step': 14487, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:03.061269', 'step': 14487, 'epoch': 2} {'type': 'loss', 'content': 0.09749046713113785, 'timestamp': '2025-10-01 04:37:03.072065', 'step': 14488, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:37:03.125004', 'step': 14488, 'epoch': 2} {'type': 'loss', 'content': 0.07836516201496124, 'timestamp': '2025-10-01 04:37:03.128782', 'step': 14489, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:03.182159', 'step': 14489, 'epoch': 2} {'type': 'loss', 'content': 0.12316344678401947, 'timestamp': '2025-10-01 04:37:03.184254', 'step': 14490, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:03.237883', 'step': 14490, 'epoch': 2} {'type': 'loss', 'content': 0.0417708158493042, 'timestamp': '2025-10-01 04:37:03.240384', 'step': 14491, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:37:03.294190', 'step': 14491, 'epoch': 2} {'type': 'loss', 'content': 0.06137188896536827, 'timestamp': '2025-10-01 04:37:03.300251', 'step': 14492, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:03.353112', 'step': 14492, 'epoch': 2} {'type': 'loss', 'content': 0.11320114880800247, 'timestamp': '2025-10-01 04:37:03.355338', 'step': 14493, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:03.410467', 'step': 14493, 'epoch': 2} {'type': 'loss', 'content': 0.14787915349006653, 'timestamp': '2025-10-01 04:37:03.412310', 'step': 14494, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:03.465000', 'step': 14494, 'epoch': 2} {'type': 'loss', 'content': 0.08826645463705063, 'timestamp': '2025-10-01 04:37:03.468139', 'step': 14495, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:03.521061', 'step': 14495, 'epoch': 2} {'type': 'loss', 'content': 0.08820760250091553, 'timestamp': '2025-10-01 04:37:03.527091', 'step': 14496, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:37:03.581071', 'step': 14496, 'epoch': 2} {'type': 'loss', 'content': 0.1096070185303688, 'timestamp': '2025-10-01 04:37:03.583612', 'step': 14497, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:03.637900', 'step': 14497, 'epoch': 2} {'type': 'loss', 'content': 0.10066674649715424, 'timestamp': '2025-10-01 04:37:03.640136', 'step': 14498, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:37:03.694368', 'step': 14498, 'epoch': 2} {'type': 'loss', 'content': 0.12699590623378754, 'timestamp': '2025-10-01 04:37:03.696604', 'step': 14499, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:03.750699', 'step': 14499, 'epoch': 2} {'type': 'loss', 'content': 0.12411390990018845, 'timestamp': '2025-10-01 04:37:03.756823', 'step': 14500, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 14500', 'timestamp': '2025-10-01 04:37:04.299201', 'step': 14500, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:04.358659', 'step': 14500, 'epoch': 2} {'type': 'loss', 'content': 0.09338364005088806, 'timestamp': '2025-10-01 04:37:04.360820', 'step': 14501, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:04.417591', 'step': 14501, 'epoch': 2} {'type': 'loss', 'content': 0.18755877017974854, 'timestamp': '2025-10-01 04:37:04.419588', 'step': 14502, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:04.473247', 'step': 14502, 'epoch': 2} {'type': 'loss', 'content': 0.0943433865904808, 'timestamp': '2025-10-01 04:37:04.475518', 'step': 14503, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:04.529888', 'step': 14503, 'epoch': 2} {'type': 'loss', 'content': 0.10056713223457336, 'timestamp': '2025-10-01 04:37:04.537530', 'step': 14504, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:04.591914', 'step': 14504, 'epoch': 2} {'type': 'loss', 'content': 0.09095626324415207, 'timestamp': '2025-10-01 04:37:04.594168', 'step': 14505, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:04.649211', 'step': 14505, 'epoch': 2} {'type': 'loss', 'content': 0.0791865810751915, 'timestamp': '2025-10-01 04:37:04.651912', 'step': 14506, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:04.705682', 'step': 14506, 'epoch': 2} {'type': 'loss', 'content': 0.1682307869195938, 'timestamp': '2025-10-01 04:37:04.707821', 'step': 14507, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:04.777210', 'step': 14507, 'epoch': 2} {'type': 'loss', 'content': 0.13587099313735962, 'timestamp': '2025-10-01 04:37:04.783992', 'step': 14508, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:04.838076', 'step': 14508, 'epoch': 2} {'type': 'loss', 'content': 0.0906088575720787, 'timestamp': '2025-10-01 04:37:04.840454', 'step': 14509, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:04.896827', 'step': 14509, 'epoch': 2} {'type': 'loss', 'content': 0.08974840492010117, 'timestamp': '2025-10-01 04:37:04.899416', 'step': 14510, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:04.954785', 'step': 14510, 'epoch': 2} {'type': 'loss', 'content': 0.16668196022510529, 'timestamp': '2025-10-01 04:37:04.957490', 'step': 14511, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:37:05.012347', 'step': 14511, 'epoch': 2} {'type': 'loss', 'content': 0.17781056463718414, 'timestamp': '2025-10-01 04:37:05.018521', 'step': 14512, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:05.079577', 'step': 14512, 'epoch': 2} {'type': 'loss', 'content': 0.10753633826971054, 'timestamp': '2025-10-01 04:37:05.082164', 'step': 14513, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:37:05.137290', 'step': 14513, 'epoch': 2} {'type': 'loss', 'content': 0.13204191625118256, 'timestamp': '2025-10-01 04:37:05.139655', 'step': 14514, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:05.194441', 'step': 14514, 'epoch': 2} {'type': 'loss', 'content': 0.13759377598762512, 'timestamp': '2025-10-01 04:37:05.196819', 'step': 14515, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:05.250828', 'step': 14515, 'epoch': 2} {'type': 'loss', 'content': 0.0961233600974083, 'timestamp': '2025-10-01 04:37:05.257010', 'step': 14516, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:05.310625', 'step': 14516, 'epoch': 2} {'type': 'loss', 'content': 0.1494421809911728, 'timestamp': '2025-10-01 04:37:05.313137', 'step': 14517, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:37:05.367454', 'step': 14517, 'epoch': 2} {'type': 'loss', 'content': 0.18868716061115265, 'timestamp': '2025-10-01 04:37:05.369819', 'step': 14518, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:37:05.425187', 'step': 14518, 'epoch': 2} {'type': 'loss', 'content': 0.2091558575630188, 'timestamp': '2025-10-01 04:37:05.427935', 'step': 14519, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:37:05.487053', 'step': 14519, 'epoch': 2} {'type': 'loss', 'content': 0.24900850653648376, 'timestamp': '2025-10-01 04:37:05.493914', 'step': 14520, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:37:05.550852', 'step': 14520, 'epoch': 2} {'type': 'loss', 'content': 0.10965798795223236, 'timestamp': '2025-10-01 04:37:05.553415', 'step': 14521, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:05.610306', 'step': 14521, 'epoch': 2} {'type': 'loss', 'content': 0.19949671626091003, 'timestamp': '2025-10-01 04:37:05.612877', 'step': 14522, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:05.669946', 'step': 14522, 'epoch': 2} {'type': 'loss', 'content': 0.17700357735157013, 'timestamp': '2025-10-01 04:37:05.672440', 'step': 14523, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:37:05.760734', 'step': 14523, 'epoch': 2} {'type': 'loss', 'content': 0.17131216824054718, 'timestamp': '2025-10-01 04:37:05.768922', 'step': 14524, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:05.834942', 'step': 14524, 'epoch': 2} {'type': 'loss', 'content': 0.10935446619987488, 'timestamp': '2025-10-01 04:37:05.837768', 'step': 14525, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:05.894188', 'step': 14525, 'epoch': 2} {'type': 'loss', 'content': 0.1701943427324295, 'timestamp': '2025-10-01 04:37:05.896498', 'step': 14526, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:37:05.958062', 'step': 14526, 'epoch': 2} {'type': 'loss', 'content': 0.1603699028491974, 'timestamp': '2025-10-01 04:37:05.961673', 'step': 14527, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:06.018323', 'step': 14527, 'epoch': 2} {'type': 'loss', 'content': 0.0657157152891159, 'timestamp': '2025-10-01 04:37:06.025825', 'step': 14528, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:06.082988', 'step': 14528, 'epoch': 2} {'type': 'loss', 'content': 0.09447559714317322, 'timestamp': '2025-10-01 04:37:06.089279', 'step': 14529, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:06.149227', 'step': 14529, 'epoch': 2} {'type': 'loss', 'content': 0.0879591554403305, 'timestamp': '2025-10-01 04:37:06.151916', 'step': 14530, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:06.215748', 'step': 14530, 'epoch': 2} {'type': 'loss', 'content': 0.11569653451442719, 'timestamp': '2025-10-01 04:37:06.221310', 'step': 14531, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:06.294651', 'step': 14531, 'epoch': 2} {'type': 'loss', 'content': 0.11399544030427933, 'timestamp': '2025-10-01 04:37:06.313618', 'step': 14532, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:06.367945', 'step': 14532, 'epoch': 2} {'type': 'loss', 'content': 0.07971562445163727, 'timestamp': '2025-10-01 04:37:06.371092', 'step': 14533, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:06.426919', 'step': 14533, 'epoch': 2} {'type': 'loss', 'content': 0.12286796420812607, 'timestamp': '2025-10-01 04:37:06.429128', 'step': 14534, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:06.483517', 'step': 14534, 'epoch': 2} {'type': 'loss', 'content': 0.08984966576099396, 'timestamp': '2025-10-01 04:37:06.486040', 'step': 14535, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:06.543079', 'step': 14535, 'epoch': 2} {'type': 'loss', 'content': 0.08693604916334152, 'timestamp': '2025-10-01 04:37:06.548552', 'step': 14536, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:06.606999', 'step': 14536, 'epoch': 2} {'type': 'loss', 'content': 0.0667228102684021, 'timestamp': '2025-10-01 04:37:06.609735', 'step': 14537, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:37:06.666943', 'step': 14537, 'epoch': 2} {'type': 'loss', 'content': 0.15670226514339447, 'timestamp': '2025-10-01 04:37:06.669431', 'step': 14538, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:06.726485', 'step': 14538, 'epoch': 2} {'type': 'loss', 'content': 0.12728260457515717, 'timestamp': '2025-10-01 04:37:06.728944', 'step': 14539, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:06.784749', 'step': 14539, 'epoch': 2} {'type': 'loss', 'content': 0.06024422496557236, 'timestamp': '2025-10-01 04:37:06.795158', 'step': 14540, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:06.848390', 'step': 14540, 'epoch': 2} {'type': 'loss', 'content': 0.1421455293893814, 'timestamp': '2025-10-01 04:37:06.850377', 'step': 14541, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:06.903477', 'step': 14541, 'epoch': 2} {'type': 'loss', 'content': 0.13160555064678192, 'timestamp': '2025-10-01 04:37:06.905810', 'step': 14542, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:06.959431', 'step': 14542, 'epoch': 2} {'type': 'loss', 'content': 0.08696220070123672, 'timestamp': '2025-10-01 04:37:06.961472', 'step': 14543, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:07.014580', 'step': 14543, 'epoch': 2} {'type': 'loss', 'content': 0.11768912523984909, 'timestamp': '2025-10-01 04:37:07.020342', 'step': 14544, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:07.074754', 'step': 14544, 'epoch': 2} {'type': 'loss', 'content': 0.14931638538837433, 'timestamp': '2025-10-01 04:37:07.078289', 'step': 14545, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:07.132865', 'step': 14545, 'epoch': 2} {'type': 'loss', 'content': 0.10954007506370544, 'timestamp': '2025-10-01 04:37:07.135545', 'step': 14546, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:07.189824', 'step': 14546, 'epoch': 2} {'type': 'loss', 'content': 0.1552528440952301, 'timestamp': '2025-10-01 04:37:07.192126', 'step': 14547, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:07.245392', 'step': 14547, 'epoch': 2} {'type': 'loss', 'content': 0.11137383431196213, 'timestamp': '2025-10-01 04:37:07.251163', 'step': 14548, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:07.305350', 'step': 14548, 'epoch': 2} {'type': 'loss', 'content': 0.08818621933460236, 'timestamp': '2025-10-01 04:37:07.307625', 'step': 14549, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:07.361138', 'step': 14549, 'epoch': 2} {'type': 'loss', 'content': 0.01964006945490837, 'timestamp': '2025-10-01 04:37:07.363397', 'step': 14550, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:07.417592', 'step': 14550, 'epoch': 2} {'type': 'loss', 'content': 0.10629842430353165, 'timestamp': '2025-10-01 04:37:07.420442', 'step': 14551, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:07.474268', 'step': 14551, 'epoch': 2} {'type': 'loss', 'content': 0.06436122208833694, 'timestamp': '2025-10-01 04:37:07.480033', 'step': 14552, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:37:07.532523', 'step': 14552, 'epoch': 2} {'type': 'loss', 'content': 0.07729922235012054, 'timestamp': '2025-10-01 04:37:07.534886', 'step': 14553, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:07.589368', 'step': 14553, 'epoch': 2} {'type': 'loss', 'content': 0.0926896184682846, 'timestamp': '2025-10-01 04:37:07.591664', 'step': 14554, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:07.649063', 'step': 14554, 'epoch': 2} {'type': 'loss', 'content': 0.20960934460163116, 'timestamp': '2025-10-01 04:37:07.652694', 'step': 14555, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:37:07.705788', 'step': 14555, 'epoch': 2} {'type': 'loss', 'content': 0.12821783125400543, 'timestamp': '2025-10-01 04:37:07.712182', 'step': 14556, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:07.766478', 'step': 14556, 'epoch': 2} {'type': 'loss', 'content': 0.08872505277395248, 'timestamp': '2025-10-01 04:37:07.768706', 'step': 14557, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:07.823618', 'step': 14557, 'epoch': 2} {'type': 'loss', 'content': 0.13867934048175812, 'timestamp': '2025-10-01 04:37:07.826888', 'step': 14558, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:07.880604', 'step': 14558, 'epoch': 2} {'type': 'loss', 'content': 0.07277846336364746, 'timestamp': '2025-10-01 04:37:07.882834', 'step': 14559, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:07.936443', 'step': 14559, 'epoch': 2} {'type': 'loss', 'content': 0.1177341490983963, 'timestamp': '2025-10-01 04:37:07.942733', 'step': 14560, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:07.995503', 'step': 14560, 'epoch': 2} {'type': 'loss', 'content': 0.1312560886144638, 'timestamp': '2025-10-01 04:37:07.998328', 'step': 14561, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:08.051531', 'step': 14561, 'epoch': 2} {'type': 'loss', 'content': 0.18763135373592377, 'timestamp': '2025-10-01 04:37:08.053662', 'step': 14562, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:08.106801', 'step': 14562, 'epoch': 2} {'type': 'loss', 'content': 0.10806061327457428, 'timestamp': '2025-10-01 04:37:08.108917', 'step': 14563, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:37:08.163484', 'step': 14563, 'epoch': 2} {'type': 'loss', 'content': 0.20703913271427155, 'timestamp': '2025-10-01 04:37:08.169501', 'step': 14564, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:08.223049', 'step': 14564, 'epoch': 2} {'type': 'loss', 'content': 0.06916696578264236, 'timestamp': '2025-10-01 04:37:08.226560', 'step': 14565, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:08.280110', 'step': 14565, 'epoch': 2} {'type': 'loss', 'content': 0.1341155469417572, 'timestamp': '2025-10-01 04:37:08.282494', 'step': 14566, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:08.354890', 'step': 14566, 'epoch': 2} {'type': 'loss', 'content': 0.06204495579004288, 'timestamp': '2025-10-01 04:37:08.358844', 'step': 14567, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:37:08.414592', 'step': 14567, 'epoch': 2} {'type': 'loss', 'content': 0.09662153571844101, 'timestamp': '2025-10-01 04:37:08.420568', 'step': 14568, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:08.473520', 'step': 14568, 'epoch': 2} {'type': 'loss', 'content': 0.06681102514266968, 'timestamp': '2025-10-01 04:37:08.475774', 'step': 14569, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:37:08.528936', 'step': 14569, 'epoch': 2} {'type': 'loss', 'content': 0.11308777332305908, 'timestamp': '2025-10-01 04:37:08.530974', 'step': 14570, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:37:08.584107', 'step': 14570, 'epoch': 2} {'type': 'loss', 'content': 0.12189526855945587, 'timestamp': '2025-10-01 04:37:08.586438', 'step': 14571, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:08.639598', 'step': 14571, 'epoch': 2} {'type': 'loss', 'content': 0.16094322502613068, 'timestamp': '2025-10-01 04:37:08.645381', 'step': 14572, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:08.697640', 'step': 14572, 'epoch': 2} {'type': 'loss', 'content': 0.09197431802749634, 'timestamp': '2025-10-01 04:37:08.699890', 'step': 14573, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:08.754114', 'step': 14573, 'epoch': 2} {'type': 'loss', 'content': 0.07069828361272812, 'timestamp': '2025-10-01 04:37:08.756176', 'step': 14574, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:08.810233', 'step': 14574, 'epoch': 2} {'type': 'loss', 'content': 0.12125199288129807, 'timestamp': '2025-10-01 04:37:08.812450', 'step': 14575, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:08.865712', 'step': 14575, 'epoch': 2} {'type': 'loss', 'content': 0.05881518870592117, 'timestamp': '2025-10-01 04:37:08.871526', 'step': 14576, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:08.925597', 'step': 14576, 'epoch': 2} {'type': 'loss', 'content': 0.1383199244737625, 'timestamp': '2025-10-01 04:37:08.928358', 'step': 14577, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:08.981379', 'step': 14577, 'epoch': 2} {'type': 'loss', 'content': 0.22722141444683075, 'timestamp': '2025-10-01 04:37:08.984079', 'step': 14578, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:09.037607', 'step': 14578, 'epoch': 2} {'type': 'loss', 'content': 0.05731434002518654, 'timestamp': '2025-10-01 04:37:09.039826', 'step': 14579, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:09.095162', 'step': 14579, 'epoch': 2} {'type': 'loss', 'content': 0.09835483133792877, 'timestamp': '2025-10-01 04:37:09.100858', 'step': 14580, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:09.154432', 'step': 14580, 'epoch': 2} {'type': 'loss', 'content': 0.15863041579723358, 'timestamp': '2025-10-01 04:37:09.156338', 'step': 14581, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:09.208944', 'step': 14581, 'epoch': 2} {'type': 'loss', 'content': 0.0745750442147255, 'timestamp': '2025-10-01 04:37:09.211220', 'step': 14582, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:09.265171', 'step': 14582, 'epoch': 2} {'type': 'loss', 'content': 0.08836697787046432, 'timestamp': '2025-10-01 04:37:09.267437', 'step': 14583, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:09.321252', 'step': 14583, 'epoch': 2} {'type': 'loss', 'content': 0.0895933210849762, 'timestamp': '2025-10-01 04:37:09.326891', 'step': 14584, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:09.379698', 'step': 14584, 'epoch': 2} {'type': 'loss', 'content': 0.07729997485876083, 'timestamp': '2025-10-01 04:37:09.381862', 'step': 14585, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:09.434979', 'step': 14585, 'epoch': 2} {'type': 'loss', 'content': 0.06967423856258392, 'timestamp': '2025-10-01 04:37:09.437106', 'step': 14586, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:09.490360', 'step': 14586, 'epoch': 2} {'type': 'loss', 'content': 0.09077096730470657, 'timestamp': '2025-10-01 04:37:09.492821', 'step': 14587, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:09.556022', 'step': 14587, 'epoch': 2} {'type': 'loss', 'content': 0.18097630143165588, 'timestamp': '2025-10-01 04:37:09.562523', 'step': 14588, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:37:09.615223', 'step': 14588, 'epoch': 2} {'type': 'loss', 'content': 0.10649724304676056, 'timestamp': '2025-10-01 04:37:09.617348', 'step': 14589, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:09.670668', 'step': 14589, 'epoch': 2} {'type': 'loss', 'content': 0.17180196940898895, 'timestamp': '2025-10-01 04:37:09.672839', 'step': 14590, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:09.727129', 'step': 14590, 'epoch': 2} {'type': 'loss', 'content': 0.08171779662370682, 'timestamp': '2025-10-01 04:37:09.729231', 'step': 14591, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:09.783195', 'step': 14591, 'epoch': 2} {'type': 'loss', 'content': 0.1171213835477829, 'timestamp': '2025-10-01 04:37:09.789586', 'step': 14592, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:09.842536', 'step': 14592, 'epoch': 2} {'type': 'loss', 'content': 0.11372864246368408, 'timestamp': '2025-10-01 04:37:09.844679', 'step': 14593, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:09.898283', 'step': 14593, 'epoch': 2} {'type': 'loss', 'content': 0.0678539052605629, 'timestamp': '2025-10-01 04:37:09.900461', 'step': 14594, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:09.954681', 'step': 14594, 'epoch': 2} {'type': 'loss', 'content': 0.1120658740401268, 'timestamp': '2025-10-01 04:37:09.960366', 'step': 14595, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:10.023206', 'step': 14595, 'epoch': 2} {'type': 'loss', 'content': 0.12128666788339615, 'timestamp': '2025-10-01 04:37:10.029159', 'step': 14596, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:10.089805', 'step': 14596, 'epoch': 2} {'type': 'loss', 'content': 0.06684868037700653, 'timestamp': '2025-10-01 04:37:10.092133', 'step': 14597, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:37:10.146859', 'step': 14597, 'epoch': 2} {'type': 'loss', 'content': 0.06599398702383041, 'timestamp': '2025-10-01 04:37:10.148960', 'step': 14598, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:37:10.202819', 'step': 14598, 'epoch': 2} {'type': 'loss', 'content': 0.04441062733530998, 'timestamp': '2025-10-01 04:37:10.207523', 'step': 14599, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:10.262604', 'step': 14599, 'epoch': 2} {'type': 'loss', 'content': 0.07208438962697983, 'timestamp': '2025-10-01 04:37:10.268554', 'step': 14600, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:10.325794', 'step': 14600, 'epoch': 2} {'type': 'loss', 'content': 0.09473076462745667, 'timestamp': '2025-10-01 04:37:10.327929', 'step': 14601, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:10.382391', 'step': 14601, 'epoch': 2} {'type': 'loss', 'content': 0.061719637364149094, 'timestamp': '2025-10-01 04:37:10.384571', 'step': 14602, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:10.445233', 'step': 14602, 'epoch': 2} {'type': 'loss', 'content': 0.13343545794487, 'timestamp': '2025-10-01 04:37:10.449451', 'step': 14603, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:10.510525', 'step': 14603, 'epoch': 2} {'type': 'loss', 'content': 0.13452814519405365, 'timestamp': '2025-10-01 04:37:10.517233', 'step': 14604, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:10.569787', 'step': 14604, 'epoch': 2} {'type': 'loss', 'content': 0.07459961622953415, 'timestamp': '2025-10-01 04:37:10.572130', 'step': 14605, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:37:10.625639', 'step': 14605, 'epoch': 2} {'type': 'loss', 'content': 0.1469009965658188, 'timestamp': '2025-10-01 04:37:10.627768', 'step': 14606, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:37:10.681117', 'step': 14606, 'epoch': 2} {'type': 'loss', 'content': 0.0615423284471035, 'timestamp': '2025-10-01 04:37:10.683351', 'step': 14607, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:10.736860', 'step': 14607, 'epoch': 2} {'type': 'loss', 'content': 0.10244233161211014, 'timestamp': '2025-10-01 04:37:10.742588', 'step': 14608, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:37:10.795858', 'step': 14608, 'epoch': 2} {'type': 'loss', 'content': 0.1291637271642685, 'timestamp': '2025-10-01 04:37:10.798029', 'step': 14609, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:10.851808', 'step': 14609, 'epoch': 2} {'type': 'loss', 'content': 0.10114980489015579, 'timestamp': '2025-10-01 04:37:10.855331', 'step': 14610, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:10.917137', 'step': 14610, 'epoch': 2} {'type': 'loss', 'content': 0.10811033099889755, 'timestamp': '2025-10-01 04:37:10.919435', 'step': 14611, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:10.973748', 'step': 14611, 'epoch': 2} {'type': 'loss', 'content': 0.14885273575782776, 'timestamp': '2025-10-01 04:37:10.979612', 'step': 14612, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:11.033117', 'step': 14612, 'epoch': 2} {'type': 'loss', 'content': 0.07273883372545242, 'timestamp': '2025-10-01 04:37:11.037072', 'step': 14613, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:37:11.090913', 'step': 14613, 'epoch': 2} {'type': 'loss', 'content': 0.10519631206989288, 'timestamp': '2025-10-01 04:37:11.093066', 'step': 14614, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:37:11.146247', 'step': 14614, 'epoch': 2} {'type': 'loss', 'content': 0.07073397934436798, 'timestamp': '2025-10-01 04:37:11.148533', 'step': 14615, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:11.201623', 'step': 14615, 'epoch': 2} {'type': 'loss', 'content': 0.08324655890464783, 'timestamp': '2025-10-01 04:37:11.207280', 'step': 14616, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:11.259932', 'step': 14616, 'epoch': 2} {'type': 'loss', 'content': 0.07502968609333038, 'timestamp': '2025-10-01 04:37:11.262133', 'step': 14617, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-01 04:37:11.315221', 'step': 14617, 'epoch': 2} {'type': 'loss', 'content': 0.13310658931732178, 'timestamp': '2025-10-01 04:37:11.317144', 'step': 14618, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:37:11.370243', 'step': 14618, 'epoch': 2} {'type': 'loss', 'content': 0.1943957656621933, 'timestamp': '2025-10-01 04:37:11.380097', 'step': 14619, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:11.437175', 'step': 14619, 'epoch': 2} {'type': 'loss', 'content': 0.04781022667884827, 'timestamp': '2025-10-01 04:37:11.443559', 'step': 14620, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:11.516048', 'step': 14620, 'epoch': 2} {'type': 'loss', 'content': 0.21910253167152405, 'timestamp': '2025-10-01 04:37:11.518178', 'step': 14621, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:11.574012', 'step': 14621, 'epoch': 2} {'type': 'loss', 'content': 0.12126278132200241, 'timestamp': '2025-10-01 04:37:11.576284', 'step': 14622, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:11.629712', 'step': 14622, 'epoch': 2} {'type': 'loss', 'content': 0.13760460913181305, 'timestamp': '2025-10-01 04:37:11.631762', 'step': 14623, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:11.685180', 'step': 14623, 'epoch': 2} {'type': 'loss', 'content': 0.12251371145248413, 'timestamp': '2025-10-01 04:37:11.691791', 'step': 14624, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:11.745009', 'step': 14624, 'epoch': 2} {'type': 'loss', 'content': 0.10104963928461075, 'timestamp': '2025-10-01 04:37:11.749395', 'step': 14625, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:11.804175', 'step': 14625, 'epoch': 2} {'type': 'loss', 'content': 0.11546220630407333, 'timestamp': '2025-10-01 04:37:11.806571', 'step': 14626, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:11.866577', 'step': 14626, 'epoch': 2} {'type': 'loss', 'content': 0.24174152314662933, 'timestamp': '2025-10-01 04:37:11.882603', 'step': 14627, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:37:11.939055', 'step': 14627, 'epoch': 2} {'type': 'loss', 'content': 0.06792014837265015, 'timestamp': '2025-10-01 04:37:11.944679', 'step': 14628, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:37:11.998942', 'step': 14628, 'epoch': 2} {'type': 'loss', 'content': 0.14337807893753052, 'timestamp': '2025-10-01 04:37:12.001116', 'step': 14629, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:37:12.054267', 'step': 14629, 'epoch': 2} {'type': 'loss', 'content': 0.08114240318536758, 'timestamp': '2025-10-01 04:37:12.056901', 'step': 14630, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:12.110231', 'step': 14630, 'epoch': 2} {'type': 'loss', 'content': 0.1313299536705017, 'timestamp': '2025-10-01 04:37:12.112431', 'step': 14631, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:12.165575', 'step': 14631, 'epoch': 2} {'type': 'loss', 'content': 0.13877424597740173, 'timestamp': '2025-10-01 04:37:12.171376', 'step': 14632, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:12.224687', 'step': 14632, 'epoch': 2} {'type': 'loss', 'content': 0.10792495310306549, 'timestamp': '2025-10-01 04:37:12.226797', 'step': 14633, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:12.280459', 'step': 14633, 'epoch': 2} {'type': 'loss', 'content': 0.10063139349222183, 'timestamp': '2025-10-01 04:37:12.282670', 'step': 14634, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:12.338919', 'step': 14634, 'epoch': 2} {'type': 'loss', 'content': 0.07526102662086487, 'timestamp': '2025-10-01 04:37:12.341238', 'step': 14635, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:12.395396', 'step': 14635, 'epoch': 2} {'type': 'loss', 'content': 0.12286389619112015, 'timestamp': '2025-10-01 04:37:12.401319', 'step': 14636, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:12.454557', 'step': 14636, 'epoch': 2} {'type': 'loss', 'content': 0.14924836158752441, 'timestamp': '2025-10-01 04:37:12.456886', 'step': 14637, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:12.510270', 'step': 14637, 'epoch': 2} {'type': 'loss', 'content': 0.09202327579259872, 'timestamp': '2025-10-01 04:37:12.512411', 'step': 14638, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:12.566224', 'step': 14638, 'epoch': 2} {'type': 'loss', 'content': 0.07031083106994629, 'timestamp': '2025-10-01 04:37:12.569312', 'step': 14639, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:12.622290', 'step': 14639, 'epoch': 2} {'type': 'loss', 'content': 0.12359301745891571, 'timestamp': '2025-10-01 04:37:12.628093', 'step': 14640, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:12.681186', 'step': 14640, 'epoch': 2} {'type': 'loss', 'content': 0.18571393191814423, 'timestamp': '2025-10-01 04:37:12.683096', 'step': 14641, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:12.736751', 'step': 14641, 'epoch': 2} {'type': 'loss', 'content': 0.11503871530294418, 'timestamp': '2025-10-01 04:37:12.739411', 'step': 14642, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:12.810986', 'step': 14642, 'epoch': 2} {'type': 'loss', 'content': 0.08833089470863342, 'timestamp': '2025-10-01 04:37:12.813184', 'step': 14643, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:37:12.867784', 'step': 14643, 'epoch': 2} {'type': 'loss', 'content': 0.1635819375514984, 'timestamp': '2025-10-01 04:37:12.875595', 'step': 14644, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:12.928868', 'step': 14644, 'epoch': 2} {'type': 'loss', 'content': 0.17891089618206024, 'timestamp': '2025-10-01 04:37:12.931352', 'step': 14645, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:12.985334', 'step': 14645, 'epoch': 2} {'type': 'loss', 'content': 0.09298232942819595, 'timestamp': '2025-10-01 04:37:12.987582', 'step': 14646, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:13.041815', 'step': 14646, 'epoch': 2} {'type': 'loss', 'content': 0.1609492003917694, 'timestamp': '2025-10-01 04:37:13.044087', 'step': 14647, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:13.099695', 'step': 14647, 'epoch': 2} {'type': 'loss', 'content': 0.1062631607055664, 'timestamp': '2025-10-01 04:37:13.105878', 'step': 14648, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:13.160895', 'step': 14648, 'epoch': 2} {'type': 'loss', 'content': 0.16477486491203308, 'timestamp': '2025-10-01 04:37:13.163136', 'step': 14649, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:13.217273', 'step': 14649, 'epoch': 2} {'type': 'loss', 'content': 0.08783979713916779, 'timestamp': '2025-10-01 04:37:13.219338', 'step': 14650, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:13.273180', 'step': 14650, 'epoch': 2} {'type': 'loss', 'content': 0.14764566719532013, 'timestamp': '2025-10-01 04:37:13.275236', 'step': 14651, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:13.328995', 'step': 14651, 'epoch': 2} {'type': 'loss', 'content': 0.15999600291252136, 'timestamp': '2025-10-01 04:37:13.335108', 'step': 14652, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:37:13.389665', 'step': 14652, 'epoch': 2} {'type': 'loss', 'content': 0.16459186375141144, 'timestamp': '2025-10-01 04:37:13.392819', 'step': 14653, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:13.446334', 'step': 14653, 'epoch': 2} {'type': 'loss', 'content': 0.13332423567771912, 'timestamp': '2025-10-01 04:37:13.449173', 'step': 14654, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:13.504217', 'step': 14654, 'epoch': 2} {'type': 'loss', 'content': 0.16565071046352386, 'timestamp': '2025-10-01 04:37:13.506435', 'step': 14655, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:37:13.559296', 'step': 14655, 'epoch': 2} {'type': 'loss', 'content': 0.14235633611679077, 'timestamp': '2025-10-01 04:37:13.565504', 'step': 14656, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:13.618929', 'step': 14656, 'epoch': 2} {'type': 'loss', 'content': 0.16421206295490265, 'timestamp': '2025-10-01 04:37:13.621263', 'step': 14657, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:13.674932', 'step': 14657, 'epoch': 2} {'type': 'loss', 'content': 0.054794441908597946, 'timestamp': '2025-10-01 04:37:13.677046', 'step': 14658, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:37:13.730681', 'step': 14658, 'epoch': 2} {'type': 'loss', 'content': 0.1142096146941185, 'timestamp': '2025-10-01 04:37:13.733319', 'step': 14659, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:13.786858', 'step': 14659, 'epoch': 2} {'type': 'loss', 'content': 0.07413282245397568, 'timestamp': '2025-10-01 04:37:13.792776', 'step': 14660, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:13.845876', 'step': 14660, 'epoch': 2} {'type': 'loss', 'content': 0.0597858726978302, 'timestamp': '2025-10-01 04:37:13.848048', 'step': 14661, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:37:13.902274', 'step': 14661, 'epoch': 2} {'type': 'loss', 'content': 0.09938391298055649, 'timestamp': '2025-10-01 04:37:13.904786', 'step': 14662, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:13.959681', 'step': 14662, 'epoch': 2} {'type': 'loss', 'content': 0.2113572508096695, 'timestamp': '2025-10-01 04:37:13.962032', 'step': 14663, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:14.017152', 'step': 14663, 'epoch': 2} {'type': 'loss', 'content': 0.16428826749324799, 'timestamp': '2025-10-01 04:37:14.023391', 'step': 14664, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:14.077498', 'step': 14664, 'epoch': 2} {'type': 'loss', 'content': 0.1188209280371666, 'timestamp': '2025-10-01 04:37:14.080775', 'step': 14665, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:14.134929', 'step': 14665, 'epoch': 2} {'type': 'loss', 'content': 0.10394653677940369, 'timestamp': '2025-10-01 04:37:14.137632', 'step': 14666, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:14.191825', 'step': 14666, 'epoch': 2} {'type': 'loss', 'content': 0.18854500353336334, 'timestamp': '2025-10-01 04:37:14.194427', 'step': 14667, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:14.248416', 'step': 14667, 'epoch': 2} {'type': 'loss', 'content': 0.12792262434959412, 'timestamp': '2025-10-01 04:37:14.254640', 'step': 14668, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:37:14.308635', 'step': 14668, 'epoch': 2} {'type': 'loss', 'content': 0.11470087617635727, 'timestamp': '2025-10-01 04:37:14.311315', 'step': 14669, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:37:14.367157', 'step': 14669, 'epoch': 2} {'type': 'loss', 'content': 0.11594714224338531, 'timestamp': '2025-10-01 04:37:14.369390', 'step': 14670, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:14.423985', 'step': 14670, 'epoch': 2} {'type': 'loss', 'content': 0.06328042596578598, 'timestamp': '2025-10-01 04:37:14.427367', 'step': 14671, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:14.481411', 'step': 14671, 'epoch': 2} {'type': 'loss', 'content': 0.07375497370958328, 'timestamp': '2025-10-01 04:37:14.488068', 'step': 14672, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:14.541624', 'step': 14672, 'epoch': 2} {'type': 'loss', 'content': 0.0937519371509552, 'timestamp': '2025-10-01 04:37:14.544121', 'step': 14673, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:14.599132', 'step': 14673, 'epoch': 2} {'type': 'loss', 'content': 0.1363801658153534, 'timestamp': '2025-10-01 04:37:14.601544', 'step': 14674, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:14.661002', 'step': 14674, 'epoch': 2} {'type': 'loss', 'content': 0.08687766641378403, 'timestamp': '2025-10-01 04:37:14.663469', 'step': 14675, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:14.718113', 'step': 14675, 'epoch': 2} {'type': 'loss', 'content': 0.07982821017503738, 'timestamp': '2025-10-01 04:37:14.724781', 'step': 14676, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:14.781177', 'step': 14676, 'epoch': 2} {'type': 'loss', 'content': 0.12054091691970825, 'timestamp': '2025-10-01 04:37:14.783523', 'step': 14677, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:14.838739', 'step': 14677, 'epoch': 2} {'type': 'loss', 'content': 0.07804521173238754, 'timestamp': '2025-10-01 04:37:14.840916', 'step': 14678, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:14.895355', 'step': 14678, 'epoch': 2} {'type': 'loss', 'content': 0.060327667742967606, 'timestamp': '2025-10-01 04:37:14.897776', 'step': 14679, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:37:14.955419', 'step': 14679, 'epoch': 2} {'type': 'loss', 'content': 0.12038841843605042, 'timestamp': '2025-10-01 04:37:14.962553', 'step': 14680, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:15.016894', 'step': 14680, 'epoch': 2} {'type': 'loss', 'content': 0.09690573066473007, 'timestamp': '2025-10-01 04:37:15.019372', 'step': 14681, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:15.074002', 'step': 14681, 'epoch': 2} {'type': 'loss', 'content': 0.09700950235128403, 'timestamp': '2025-10-01 04:37:15.077311', 'step': 14682, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:15.131696', 'step': 14682, 'epoch': 2} {'type': 'loss', 'content': 0.10821467638015747, 'timestamp': '2025-10-01 04:37:15.134555', 'step': 14683, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:37:15.187892', 'step': 14683, 'epoch': 2} {'type': 'loss', 'content': 0.07524675130844116, 'timestamp': '2025-10-01 04:37:15.194178', 'step': 14684, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:15.247132', 'step': 14684, 'epoch': 2} {'type': 'loss', 'content': 0.1475219428539276, 'timestamp': '2025-10-01 04:37:15.249267', 'step': 14685, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:15.301968', 'step': 14685, 'epoch': 2} {'type': 'loss', 'content': 0.16192488372325897, 'timestamp': '2025-10-01 04:37:15.304124', 'step': 14686, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:15.356954', 'step': 14686, 'epoch': 2} {'type': 'loss', 'content': 0.08340679854154587, 'timestamp': '2025-10-01 04:37:15.359120', 'step': 14687, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:15.411741', 'step': 14687, 'epoch': 2} {'type': 'loss', 'content': 0.059608206152915955, 'timestamp': '2025-10-01 04:37:15.417550', 'step': 14688, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:37:15.470788', 'step': 14688, 'epoch': 2} {'type': 'loss', 'content': 0.09361889213323593, 'timestamp': '2025-10-01 04:37:15.474213', 'step': 14689, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:15.527090', 'step': 14689, 'epoch': 2} {'type': 'loss', 'content': 0.10094767808914185, 'timestamp': '2025-10-01 04:37:15.529447', 'step': 14690, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:15.583270', 'step': 14690, 'epoch': 2} {'type': 'loss', 'content': 0.1118866503238678, 'timestamp': '2025-10-01 04:37:15.596230', 'step': 14691, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:15.649998', 'step': 14691, 'epoch': 2} {'type': 'loss', 'content': 0.06272443383932114, 'timestamp': '2025-10-01 04:37:15.655815', 'step': 14692, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:15.708295', 'step': 14692, 'epoch': 2} {'type': 'loss', 'content': 0.06617450714111328, 'timestamp': '2025-10-01 04:37:15.722480', 'step': 14693, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:15.780164', 'step': 14693, 'epoch': 2} {'type': 'loss', 'content': 0.08197728544473648, 'timestamp': '2025-10-01 04:37:15.782314', 'step': 14694, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:15.835816', 'step': 14694, 'epoch': 2} {'type': 'loss', 'content': 0.03863360360264778, 'timestamp': '2025-10-01 04:37:15.837960', 'step': 14695, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:15.890768', 'step': 14695, 'epoch': 2} {'type': 'loss', 'content': 0.08593469858169556, 'timestamp': '2025-10-01 04:37:15.896662', 'step': 14696, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:15.949523', 'step': 14696, 'epoch': 2} {'type': 'loss', 'content': 0.0896521732211113, 'timestamp': '2025-10-01 04:37:15.951749', 'step': 14697, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:16.005090', 'step': 14697, 'epoch': 2} {'type': 'loss', 'content': 0.04985464736819267, 'timestamp': '2025-10-01 04:37:16.007316', 'step': 14698, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:16.060600', 'step': 14698, 'epoch': 2} {'type': 'loss', 'content': 0.11389713734388351, 'timestamp': '2025-10-01 04:37:16.063150', 'step': 14699, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:37:16.122528', 'step': 14699, 'epoch': 2} {'type': 'loss', 'content': 0.15735770761966705, 'timestamp': '2025-10-01 04:37:16.128605', 'step': 14700, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:37:16.181923', 'step': 14700, 'epoch': 2} {'type': 'loss', 'content': 0.08599089831113815, 'timestamp': '2025-10-01 04:37:16.184101', 'step': 14701, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:16.237748', 'step': 14701, 'epoch': 2} {'type': 'loss', 'content': 0.055650707334280014, 'timestamp': '2025-10-01 04:37:16.239817', 'step': 14702, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:16.293933', 'step': 14702, 'epoch': 2} {'type': 'loss', 'content': 0.17319954931735992, 'timestamp': '2025-10-01 04:37:16.296285', 'step': 14703, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:16.350630', 'step': 14703, 'epoch': 2} {'type': 'loss', 'content': 0.07639450579881668, 'timestamp': '2025-10-01 04:37:16.356404', 'step': 14704, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:16.408686', 'step': 14704, 'epoch': 2} {'type': 'loss', 'content': 0.13399411737918854, 'timestamp': '2025-10-01 04:37:16.410867', 'step': 14705, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:16.464736', 'step': 14705, 'epoch': 2} {'type': 'loss', 'content': 0.11509405076503754, 'timestamp': '2025-10-01 04:37:16.468318', 'step': 14706, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:16.524011', 'step': 14706, 'epoch': 2} {'type': 'loss', 'content': 0.07451195269823074, 'timestamp': '2025-10-01 04:37:16.526666', 'step': 14707, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:16.587259', 'step': 14707, 'epoch': 2} {'type': 'loss', 'content': 0.17423322796821594, 'timestamp': '2025-10-01 04:37:16.592912', 'step': 14708, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:37:16.646740', 'step': 14708, 'epoch': 2} {'type': 'loss', 'content': 0.08408746123313904, 'timestamp': '2025-10-01 04:37:16.649018', 'step': 14709, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:16.703777', 'step': 14709, 'epoch': 2} {'type': 'loss', 'content': 0.0650445967912674, 'timestamp': '2025-10-01 04:37:16.706665', 'step': 14710, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:16.760486', 'step': 14710, 'epoch': 2} {'type': 'loss', 'content': 0.09538441896438599, 'timestamp': '2025-10-01 04:37:16.762913', 'step': 14711, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:16.815686', 'step': 14711, 'epoch': 2} {'type': 'loss', 'content': 0.09480833262205124, 'timestamp': '2025-10-01 04:37:16.822760', 'step': 14712, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:16.876291', 'step': 14712, 'epoch': 2} {'type': 'loss', 'content': 0.14876492321491241, 'timestamp': '2025-10-01 04:37:16.878528', 'step': 14713, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:37:16.932256', 'step': 14713, 'epoch': 2} {'type': 'loss', 'content': 0.1256246715784073, 'timestamp': '2025-10-01 04:37:16.935407', 'step': 14714, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:16.990034', 'step': 14714, 'epoch': 2} {'type': 'loss', 'content': 0.07357857376337051, 'timestamp': '2025-10-01 04:37:16.993329', 'step': 14715, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:17.047090', 'step': 14715, 'epoch': 2} {'type': 'loss', 'content': 0.14272882044315338, 'timestamp': '2025-10-01 04:37:17.052946', 'step': 14716, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:17.106659', 'step': 14716, 'epoch': 2} {'type': 'loss', 'content': 0.07960967719554901, 'timestamp': '2025-10-01 04:37:17.109586', 'step': 14717, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:17.162715', 'step': 14717, 'epoch': 2} {'type': 'loss', 'content': 0.10984742641448975, 'timestamp': '2025-10-01 04:37:17.164872', 'step': 14718, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:17.217990', 'step': 14718, 'epoch': 2} {'type': 'loss', 'content': 0.1515258252620697, 'timestamp': '2025-10-01 04:37:17.220475', 'step': 14719, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:17.274352', 'step': 14719, 'epoch': 2} {'type': 'loss', 'content': 0.16655471920967102, 'timestamp': '2025-10-01 04:37:17.280648', 'step': 14720, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:17.334447', 'step': 14720, 'epoch': 2} {'type': 'loss', 'content': 0.19651608169078827, 'timestamp': '2025-10-01 04:37:17.336986', 'step': 14721, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:17.402821', 'step': 14721, 'epoch': 2} {'type': 'loss', 'content': 0.07247289270162582, 'timestamp': '2025-10-01 04:37:17.404939', 'step': 14722, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:17.458206', 'step': 14722, 'epoch': 2} {'type': 'loss', 'content': 0.08483987301588058, 'timestamp': '2025-10-01 04:37:17.460284', 'step': 14723, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:17.513238', 'step': 14723, 'epoch': 2} {'type': 'loss', 'content': 0.08506431430578232, 'timestamp': '2025-10-01 04:37:17.518907', 'step': 14724, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:17.571784', 'step': 14724, 'epoch': 2} {'type': 'loss', 'content': 0.20811055600643158, 'timestamp': '2025-10-01 04:37:17.574242', 'step': 14725, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:17.628255', 'step': 14725, 'epoch': 2} {'type': 'loss', 'content': 0.13899926841259003, 'timestamp': '2025-10-01 04:37:17.630545', 'step': 14726, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:17.683808', 'step': 14726, 'epoch': 2} {'type': 'loss', 'content': 0.04280520975589752, 'timestamp': '2025-10-01 04:37:17.685890', 'step': 14727, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:17.739025', 'step': 14727, 'epoch': 2} {'type': 'loss', 'content': 0.10886692255735397, 'timestamp': '2025-10-01 04:37:17.744845', 'step': 14728, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:17.797677', 'step': 14728, 'epoch': 2} {'type': 'loss', 'content': 0.14294083416461945, 'timestamp': '2025-10-01 04:37:17.799829', 'step': 14729, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:17.853580', 'step': 14729, 'epoch': 2} {'type': 'loss', 'content': 0.11101813614368439, 'timestamp': '2025-10-01 04:37:17.856305', 'step': 14730, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:17.918602', 'step': 14730, 'epoch': 2} {'type': 'loss', 'content': 0.16582424938678741, 'timestamp': '2025-10-01 04:37:17.920732', 'step': 14731, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:37:17.975427', 'step': 14731, 'epoch': 2} {'type': 'loss', 'content': 0.11853233724832535, 'timestamp': '2025-10-01 04:37:17.982817', 'step': 14732, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:18.035624', 'step': 14732, 'epoch': 2} {'type': 'loss', 'content': 0.1130765751004219, 'timestamp': '2025-10-01 04:37:18.039000', 'step': 14733, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:18.092355', 'step': 14733, 'epoch': 2} {'type': 'loss', 'content': 0.11014678329229355, 'timestamp': '2025-10-01 04:37:18.095057', 'step': 14734, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:18.149027', 'step': 14734, 'epoch': 2} {'type': 'loss', 'content': 0.0656593069434166, 'timestamp': '2025-10-01 04:37:18.151625', 'step': 14735, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:18.220036', 'step': 14735, 'epoch': 2} {'type': 'loss', 'content': 0.14274930953979492, 'timestamp': '2025-10-01 04:37:18.225856', 'step': 14736, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:18.281833', 'step': 14736, 'epoch': 2} {'type': 'loss', 'content': 0.09806354343891144, 'timestamp': '2025-10-01 04:37:18.284022', 'step': 14737, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:37:18.338863', 'step': 14737, 'epoch': 2} {'type': 'loss', 'content': 0.17459645867347717, 'timestamp': '2025-10-01 04:37:18.341009', 'step': 14738, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:18.397004', 'step': 14738, 'epoch': 2} {'type': 'loss', 'content': 0.0707727000117302, 'timestamp': '2025-10-01 04:37:18.399791', 'step': 14739, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:18.453160', 'step': 14739, 'epoch': 2} {'type': 'loss', 'content': 0.12774401903152466, 'timestamp': '2025-10-01 04:37:18.459015', 'step': 14740, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:18.512794', 'step': 14740, 'epoch': 2} {'type': 'loss', 'content': 0.21677349507808685, 'timestamp': '2025-10-01 04:37:18.515187', 'step': 14741, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:18.570157', 'step': 14741, 'epoch': 2} {'type': 'loss', 'content': 0.07269545644521713, 'timestamp': '2025-10-01 04:37:18.572710', 'step': 14742, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:18.628237', 'step': 14742, 'epoch': 2} {'type': 'loss', 'content': 0.12858447432518005, 'timestamp': '2025-10-01 04:37:18.632069', 'step': 14743, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:18.688001', 'step': 14743, 'epoch': 2} {'type': 'loss', 'content': 0.013920944184064865, 'timestamp': '2025-10-01 04:37:18.694258', 'step': 14744, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:18.746699', 'step': 14744, 'epoch': 2} {'type': 'loss', 'content': 0.04761260747909546, 'timestamp': '2025-10-01 04:37:18.748976', 'step': 14745, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:37:18.802283', 'step': 14745, 'epoch': 2} {'type': 'loss', 'content': 0.11145486682653427, 'timestamp': '2025-10-01 04:37:18.805015', 'step': 14746, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:18.858154', 'step': 14746, 'epoch': 2} {'type': 'loss', 'content': 0.09071391075849533, 'timestamp': '2025-10-01 04:37:18.860588', 'step': 14747, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:18.913270', 'step': 14747, 'epoch': 2} {'type': 'loss', 'content': 0.10899809747934341, 'timestamp': '2025-10-01 04:37:18.920139', 'step': 14748, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:37:18.973703', 'step': 14748, 'epoch': 2} {'type': 'loss', 'content': 0.059016965329647064, 'timestamp': '2025-10-01 04:37:18.976812', 'step': 14749, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:19.030692', 'step': 14749, 'epoch': 2} {'type': 'loss', 'content': 0.14657650887966156, 'timestamp': '2025-10-01 04:37:19.033166', 'step': 14750, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:19.086538', 'step': 14750, 'epoch': 2} {'type': 'loss', 'content': 0.09461083263158798, 'timestamp': '2025-10-01 04:37:19.088706', 'step': 14751, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:19.142318', 'step': 14751, 'epoch': 2} {'type': 'loss', 'content': 0.14302408695220947, 'timestamp': '2025-10-01 04:37:19.149726', 'step': 14752, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:19.202955', 'step': 14752, 'epoch': 2} {'type': 'loss', 'content': 0.11997978389263153, 'timestamp': '2025-10-01 04:37:19.205108', 'step': 14753, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:19.258542', 'step': 14753, 'epoch': 2} {'type': 'loss', 'content': 0.09199763089418411, 'timestamp': '2025-10-01 04:37:19.260933', 'step': 14754, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:19.314344', 'step': 14754, 'epoch': 2} {'type': 'loss', 'content': 0.1288965940475464, 'timestamp': '2025-10-01 04:37:19.316708', 'step': 14755, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:19.369893', 'step': 14755, 'epoch': 2} {'type': 'loss', 'content': 0.07765758037567139, 'timestamp': '2025-10-01 04:37:19.375574', 'step': 14756, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:19.429472', 'step': 14756, 'epoch': 2} {'type': 'loss', 'content': 0.09991443157196045, 'timestamp': '2025-10-01 04:37:19.431564', 'step': 14757, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:19.485468', 'step': 14757, 'epoch': 2} {'type': 'loss', 'content': 0.1484934687614441, 'timestamp': '2025-10-01 04:37:19.487600', 'step': 14758, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:19.546358', 'step': 14758, 'epoch': 2} {'type': 'loss', 'content': 0.05327930673956871, 'timestamp': '2025-10-01 04:37:19.548594', 'step': 14759, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:19.601244', 'step': 14759, 'epoch': 2} {'type': 'loss', 'content': 0.08225042372941971, 'timestamp': '2025-10-01 04:37:19.607044', 'step': 14760, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:19.663424', 'step': 14760, 'epoch': 2} {'type': 'loss', 'content': 0.12047459930181503, 'timestamp': '2025-10-01 04:37:19.666695', 'step': 14761, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:19.719422', 'step': 14761, 'epoch': 2} {'type': 'loss', 'content': 0.13703155517578125, 'timestamp': '2025-10-01 04:37:19.721636', 'step': 14762, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:37:19.776496', 'step': 14762, 'epoch': 2} {'type': 'loss', 'content': 0.11895228922367096, 'timestamp': '2025-10-01 04:37:19.779148', 'step': 14763, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:19.832027', 'step': 14763, 'epoch': 2} {'type': 'loss', 'content': 0.062395427376031876, 'timestamp': '2025-10-01 04:37:19.837756', 'step': 14764, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:37:19.892572', 'step': 14764, 'epoch': 2} {'type': 'loss', 'content': 0.10426042973995209, 'timestamp': '2025-10-01 04:37:19.894770', 'step': 14765, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:19.948144', 'step': 14765, 'epoch': 2} {'type': 'loss', 'content': 0.1318528950214386, 'timestamp': '2025-10-01 04:37:19.950323', 'step': 14766, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:20.003659', 'step': 14766, 'epoch': 2} {'type': 'loss', 'content': 0.09600066393613815, 'timestamp': '2025-10-01 04:37:20.007661', 'step': 14767, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:20.062433', 'step': 14767, 'epoch': 2} {'type': 'loss', 'content': 0.1437758356332779, 'timestamp': '2025-10-01 04:37:20.068282', 'step': 14768, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:20.120660', 'step': 14768, 'epoch': 2} {'type': 'loss', 'content': 0.14552836120128632, 'timestamp': '2025-10-01 04:37:20.123030', 'step': 14769, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:20.189428', 'step': 14769, 'epoch': 2} {'type': 'loss', 'content': 0.1661708801984787, 'timestamp': '2025-10-01 04:37:20.192319', 'step': 14770, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:20.247455', 'step': 14770, 'epoch': 2} {'type': 'loss', 'content': 0.10813596844673157, 'timestamp': '2025-10-01 04:37:20.250405', 'step': 14771, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:20.303169', 'step': 14771, 'epoch': 2} {'type': 'loss', 'content': 0.13583865761756897, 'timestamp': '2025-10-01 04:37:20.308887', 'step': 14772, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:37:20.362122', 'step': 14772, 'epoch': 2} {'type': 'loss', 'content': 0.1605316698551178, 'timestamp': '2025-10-01 04:37:20.364356', 'step': 14773, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:20.417235', 'step': 14773, 'epoch': 2} {'type': 'loss', 'content': 0.15148307383060455, 'timestamp': '2025-10-01 04:37:20.422243', 'step': 14774, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:20.475476', 'step': 14774, 'epoch': 2} {'type': 'loss', 'content': 0.14286905527114868, 'timestamp': '2025-10-01 04:37:20.477778', 'step': 14775, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:20.531649', 'step': 14775, 'epoch': 2} {'type': 'loss', 'content': 0.09029428660869598, 'timestamp': '2025-10-01 04:37:20.538128', 'step': 14776, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:37:20.593763', 'step': 14776, 'epoch': 2} {'type': 'loss', 'content': 0.14435769617557526, 'timestamp': '2025-10-01 04:37:20.595952', 'step': 14777, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:37:20.653830', 'step': 14777, 'epoch': 2} {'type': 'loss', 'content': 0.14973865449428558, 'timestamp': '2025-10-01 04:37:20.656490', 'step': 14778, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:20.720873', 'step': 14778, 'epoch': 2} {'type': 'loss', 'content': 0.09255509823560715, 'timestamp': '2025-10-01 04:37:20.724841', 'step': 14779, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:20.784027', 'step': 14779, 'epoch': 2} {'type': 'loss', 'content': 0.14724819362163544, 'timestamp': '2025-10-01 04:37:20.789993', 'step': 14780, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:20.843268', 'step': 14780, 'epoch': 2} {'type': 'loss', 'content': 0.24997632205486298, 'timestamp': '2025-10-01 04:37:20.845518', 'step': 14781, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:20.902026', 'step': 14781, 'epoch': 2} {'type': 'loss', 'content': 0.13502442836761475, 'timestamp': '2025-10-01 04:37:20.904508', 'step': 14782, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:20.958273', 'step': 14782, 'epoch': 2} {'type': 'loss', 'content': 0.10837294906377792, 'timestamp': '2025-10-01 04:37:20.962941', 'step': 14783, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:21.017728', 'step': 14783, 'epoch': 2} {'type': 'loss', 'content': 0.05912959948182106, 'timestamp': '2025-10-01 04:37:21.023675', 'step': 14784, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:21.077525', 'step': 14784, 'epoch': 2} {'type': 'loss', 'content': 0.09554112702608109, 'timestamp': '2025-10-01 04:37:21.084728', 'step': 14785, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:21.138288', 'step': 14785, 'epoch': 2} {'type': 'loss', 'content': 0.14002349972724915, 'timestamp': '2025-10-01 04:37:21.140934', 'step': 14786, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:21.198879', 'step': 14786, 'epoch': 2} {'type': 'loss', 'content': 0.08722884953022003, 'timestamp': '2025-10-01 04:37:21.203581', 'step': 14787, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:21.259337', 'step': 14787, 'epoch': 2} {'type': 'loss', 'content': 0.10199309140443802, 'timestamp': '2025-10-01 04:37:21.266090', 'step': 14788, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:21.319004', 'step': 14788, 'epoch': 2} {'type': 'loss', 'content': 0.09222384542226791, 'timestamp': '2025-10-01 04:37:21.321434', 'step': 14789, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:21.375921', 'step': 14789, 'epoch': 2} {'type': 'loss', 'content': 0.16978196799755096, 'timestamp': '2025-10-01 04:37:21.378116', 'step': 14790, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:21.432860', 'step': 14790, 'epoch': 2} {'type': 'loss', 'content': 0.09759427607059479, 'timestamp': '2025-10-01 04:37:21.435178', 'step': 14791, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:21.488269', 'step': 14791, 'epoch': 2} {'type': 'loss', 'content': 0.12996776401996613, 'timestamp': '2025-10-01 04:37:21.494069', 'step': 14792, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:21.549037', 'step': 14792, 'epoch': 2} {'type': 'loss', 'content': 0.10749898850917816, 'timestamp': '2025-10-01 04:37:21.551279', 'step': 14793, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:21.609121', 'step': 14793, 'epoch': 2} {'type': 'loss', 'content': 0.06024913862347603, 'timestamp': '2025-10-01 04:37:21.611890', 'step': 14794, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:21.666025', 'step': 14794, 'epoch': 2} {'type': 'loss', 'content': 0.09362945705652237, 'timestamp': '2025-10-01 04:37:21.668985', 'step': 14795, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:21.723262', 'step': 14795, 'epoch': 2} {'type': 'loss', 'content': 0.13569702208042145, 'timestamp': '2025-10-01 04:37:21.730151', 'step': 14796, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:21.795816', 'step': 14796, 'epoch': 2} {'type': 'loss', 'content': 0.18525926768779755, 'timestamp': '2025-10-01 04:37:21.798148', 'step': 14797, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:21.851711', 'step': 14797, 'epoch': 2} {'type': 'loss', 'content': 0.06576268374919891, 'timestamp': '2025-10-01 04:37:21.854075', 'step': 14798, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:37:21.928056', 'step': 14798, 'epoch': 2} {'type': 'loss', 'content': 0.1696949303150177, 'timestamp': '2025-10-01 04:37:21.930652', 'step': 14799, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:21.985526', 'step': 14799, 'epoch': 2} {'type': 'loss', 'content': 0.11279261857271194, 'timestamp': '2025-10-01 04:37:21.991569', 'step': 14800, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:22.058732', 'step': 14800, 'epoch': 2} {'type': 'loss', 'content': 0.15517014265060425, 'timestamp': '2025-10-01 04:37:22.060955', 'step': 14801, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:22.120722', 'step': 14801, 'epoch': 2} {'type': 'loss', 'content': 0.1380135715007782, 'timestamp': '2025-10-01 04:37:22.140451', 'step': 14802, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:22.200911', 'step': 14802, 'epoch': 2} {'type': 'loss', 'content': 0.062269166111946106, 'timestamp': '2025-10-01 04:37:22.203737', 'step': 14803, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:22.256414', 'step': 14803, 'epoch': 2} {'type': 'loss', 'content': 0.19548630714416504, 'timestamp': '2025-10-01 04:37:22.262785', 'step': 14804, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:22.316107', 'step': 14804, 'epoch': 2} {'type': 'loss', 'content': 0.10817652195692062, 'timestamp': '2025-10-01 04:37:22.318378', 'step': 14805, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:22.371505', 'step': 14805, 'epoch': 2} {'type': 'loss', 'content': 0.13434025645256042, 'timestamp': '2025-10-01 04:37:22.373926', 'step': 14806, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:22.427327', 'step': 14806, 'epoch': 2} {'type': 'loss', 'content': 0.09513969719409943, 'timestamp': '2025-10-01 04:37:22.429608', 'step': 14807, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:22.482440', 'step': 14807, 'epoch': 2} {'type': 'loss', 'content': 0.09740769118070602, 'timestamp': '2025-10-01 04:37:22.489340', 'step': 14808, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:22.543484', 'step': 14808, 'epoch': 2} {'type': 'loss', 'content': 0.045630112290382385, 'timestamp': '2025-10-01 04:37:22.546098', 'step': 14809, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:22.600323', 'step': 14809, 'epoch': 2} {'type': 'loss', 'content': 0.08123461157083511, 'timestamp': '2025-10-01 04:37:22.603006', 'step': 14810, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:22.673235', 'step': 14810, 'epoch': 2} {'type': 'loss', 'content': 0.09525035321712494, 'timestamp': '2025-10-01 04:37:22.678080', 'step': 14811, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:22.733431', 'step': 14811, 'epoch': 2} {'type': 'loss', 'content': 0.12742792069911957, 'timestamp': '2025-10-01 04:37:22.739589', 'step': 14812, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:22.794101', 'step': 14812, 'epoch': 2} {'type': 'loss', 'content': 0.11748699098825455, 'timestamp': '2025-10-01 04:37:22.796897', 'step': 14813, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:22.852696', 'step': 14813, 'epoch': 2} {'type': 'loss', 'content': 0.1217920333147049, 'timestamp': '2025-10-01 04:37:22.855764', 'step': 14814, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:22.910426', 'step': 14814, 'epoch': 2} {'type': 'loss', 'content': 0.15027309954166412, 'timestamp': '2025-10-01 04:37:22.913212', 'step': 14815, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:37:22.967446', 'step': 14815, 'epoch': 2} {'type': 'loss', 'content': 0.08784618228673935, 'timestamp': '2025-10-01 04:37:22.974146', 'step': 14816, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:23.028139', 'step': 14816, 'epoch': 2} {'type': 'loss', 'content': 0.18589691817760468, 'timestamp': '2025-10-01 04:37:23.031072', 'step': 14817, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:37:23.086408', 'step': 14817, 'epoch': 2} {'type': 'loss', 'content': 0.07232962548732758, 'timestamp': '2025-10-01 04:37:23.089415', 'step': 14818, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:23.144166', 'step': 14818, 'epoch': 2} {'type': 'loss', 'content': 0.12211690098047256, 'timestamp': '2025-10-01 04:37:23.146659', 'step': 14819, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:23.201226', 'step': 14819, 'epoch': 2} {'type': 'loss', 'content': 0.09462597221136093, 'timestamp': '2025-10-01 04:37:23.207720', 'step': 14820, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:23.265544', 'step': 14820, 'epoch': 2} {'type': 'loss', 'content': 0.13513970375061035, 'timestamp': '2025-10-01 04:37:23.268231', 'step': 14821, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:23.323924', 'step': 14821, 'epoch': 2} {'type': 'loss', 'content': 0.09673485904932022, 'timestamp': '2025-10-01 04:37:23.326478', 'step': 14822, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:23.381637', 'step': 14822, 'epoch': 2} {'type': 'loss', 'content': 0.05333342403173447, 'timestamp': '2025-10-01 04:37:23.384093', 'step': 14823, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:23.438602', 'step': 14823, 'epoch': 2} {'type': 'loss', 'content': 0.13919147849082947, 'timestamp': '2025-10-01 04:37:23.444819', 'step': 14824, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:23.498541', 'step': 14824, 'epoch': 2} {'type': 'loss', 'content': 0.181654691696167, 'timestamp': '2025-10-01 04:37:23.501140', 'step': 14825, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:23.555436', 'step': 14825, 'epoch': 2} {'type': 'loss', 'content': 0.0526982843875885, 'timestamp': '2025-10-01 04:37:23.558262', 'step': 14826, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:23.612273', 'step': 14826, 'epoch': 2} {'type': 'loss', 'content': 0.043898601084947586, 'timestamp': '2025-10-01 04:37:23.615598', 'step': 14827, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:23.670505', 'step': 14827, 'epoch': 2} {'type': 'loss', 'content': 0.16816236078739166, 'timestamp': '2025-10-01 04:37:23.676832', 'step': 14828, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:23.730825', 'step': 14828, 'epoch': 2} {'type': 'loss', 'content': 0.09394814074039459, 'timestamp': '2025-10-01 04:37:23.733707', 'step': 14829, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:23.788377', 'step': 14829, 'epoch': 2} {'type': 'loss', 'content': 0.1240622028708458, 'timestamp': '2025-10-01 04:37:23.791481', 'step': 14830, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:37:23.846538', 'step': 14830, 'epoch': 2} {'type': 'loss', 'content': 0.10069283843040466, 'timestamp': '2025-10-01 04:37:23.849247', 'step': 14831, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:23.903608', 'step': 14831, 'epoch': 2} {'type': 'loss', 'content': 0.13091044127941132, 'timestamp': '2025-10-01 04:37:23.909645', 'step': 14832, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:23.962923', 'step': 14832, 'epoch': 2} {'type': 'loss', 'content': 0.14492109417915344, 'timestamp': '2025-10-01 04:37:23.965338', 'step': 14833, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:37:24.020437', 'step': 14833, 'epoch': 2} {'type': 'loss', 'content': 0.10653392970561981, 'timestamp': '2025-10-01 04:37:24.022991', 'step': 14834, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:24.077393', 'step': 14834, 'epoch': 2} {'type': 'loss', 'content': 0.11473221331834793, 'timestamp': '2025-10-01 04:37:24.079872', 'step': 14835, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:24.136619', 'step': 14835, 'epoch': 2} {'type': 'loss', 'content': 0.19026491045951843, 'timestamp': '2025-10-01 04:37:24.143361', 'step': 14836, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:24.198625', 'step': 14836, 'epoch': 2} {'type': 'loss', 'content': 0.1355125904083252, 'timestamp': '2025-10-01 04:37:24.201009', 'step': 14837, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:24.254981', 'step': 14837, 'epoch': 2} {'type': 'loss', 'content': 0.06204597279429436, 'timestamp': '2025-10-01 04:37:24.257886', 'step': 14838, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:24.312859', 'step': 14838, 'epoch': 2} {'type': 'loss', 'content': 0.07571569085121155, 'timestamp': '2025-10-01 04:37:24.315493', 'step': 14839, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:24.369200', 'step': 14839, 'epoch': 2} {'type': 'loss', 'content': 0.14372238516807556, 'timestamp': '2025-10-01 04:37:24.375391', 'step': 14840, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:24.429456', 'step': 14840, 'epoch': 2} {'type': 'loss', 'content': 0.17771096527576447, 'timestamp': '2025-10-01 04:37:24.431901', 'step': 14841, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:24.485596', 'step': 14841, 'epoch': 2} {'type': 'loss', 'content': 0.12911652028560638, 'timestamp': '2025-10-01 04:37:24.487981', 'step': 14842, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:24.541744', 'step': 14842, 'epoch': 2} {'type': 'loss', 'content': 0.11941501498222351, 'timestamp': '2025-10-01 04:37:24.544116', 'step': 14843, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:24.598373', 'step': 14843, 'epoch': 2} {'type': 'loss', 'content': 0.10062114149332047, 'timestamp': '2025-10-01 04:37:24.604984', 'step': 14844, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:24.669730', 'step': 14844, 'epoch': 2} {'type': 'loss', 'content': 0.1892593652009964, 'timestamp': '2025-10-01 04:37:24.671939', 'step': 14845, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:24.725106', 'step': 14845, 'epoch': 2} {'type': 'loss', 'content': 0.08752501755952835, 'timestamp': '2025-10-01 04:37:24.727407', 'step': 14846, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:24.781192', 'step': 14846, 'epoch': 2} {'type': 'loss', 'content': 0.07976900041103363, 'timestamp': '2025-10-01 04:37:24.783571', 'step': 14847, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:24.837266', 'step': 14847, 'epoch': 2} {'type': 'loss', 'content': 0.07788252830505371, 'timestamp': '2025-10-01 04:37:24.843323', 'step': 14848, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:24.896284', 'step': 14848, 'epoch': 2} {'type': 'loss', 'content': 0.17426545917987823, 'timestamp': '2025-10-01 04:37:24.898692', 'step': 14849, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:24.952232', 'step': 14849, 'epoch': 2} {'type': 'loss', 'content': 0.08795275539159775, 'timestamp': '2025-10-01 04:37:24.954643', 'step': 14850, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:25.009927', 'step': 14850, 'epoch': 2} {'type': 'loss', 'content': 0.18593886494636536, 'timestamp': '2025-10-01 04:37:25.012203', 'step': 14851, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:25.066379', 'step': 14851, 'epoch': 2} {'type': 'loss', 'content': 0.12614856660366058, 'timestamp': '2025-10-01 04:37:25.072588', 'step': 14852, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:37:25.125863', 'step': 14852, 'epoch': 2} {'type': 'loss', 'content': 0.12860774993896484, 'timestamp': '2025-10-01 04:37:25.128403', 'step': 14853, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:25.182333', 'step': 14853, 'epoch': 2} {'type': 'loss', 'content': 0.13764703273773193, 'timestamp': '2025-10-01 04:37:25.185154', 'step': 14854, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:25.239180', 'step': 14854, 'epoch': 2} {'type': 'loss', 'content': 0.08029474318027496, 'timestamp': '2025-10-01 04:37:25.241798', 'step': 14855, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:25.298904', 'step': 14855, 'epoch': 2} {'type': 'loss', 'content': 0.0935167744755745, 'timestamp': '2025-10-01 04:37:25.304985', 'step': 14856, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:37:25.358463', 'step': 14856, 'epoch': 2} {'type': 'loss', 'content': 0.09641191363334656, 'timestamp': '2025-10-01 04:37:25.360892', 'step': 14857, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:25.414813', 'step': 14857, 'epoch': 2} {'type': 'loss', 'content': 0.0930573120713234, 'timestamp': '2025-10-01 04:37:25.417346', 'step': 14858, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:25.471789', 'step': 14858, 'epoch': 2} {'type': 'loss', 'content': 0.0839083194732666, 'timestamp': '2025-10-01 04:37:25.474662', 'step': 14859, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:25.528415', 'step': 14859, 'epoch': 2} {'type': 'loss', 'content': 0.08257408440113068, 'timestamp': '2025-10-01 04:37:25.534407', 'step': 14860, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:37:25.588556', 'step': 14860, 'epoch': 2} {'type': 'loss', 'content': 0.1714904010295868, 'timestamp': '2025-10-01 04:37:25.590944', 'step': 14861, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:25.644819', 'step': 14861, 'epoch': 2} {'type': 'loss', 'content': 0.12061591446399689, 'timestamp': '2025-10-01 04:37:25.647256', 'step': 14862, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:25.701561', 'step': 14862, 'epoch': 2} {'type': 'loss', 'content': 0.12767191231250763, 'timestamp': '2025-10-01 04:37:25.703991', 'step': 14863, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:25.757728', 'step': 14863, 'epoch': 2} {'type': 'loss', 'content': 0.14587189257144928, 'timestamp': '2025-10-01 04:37:25.763679', 'step': 14864, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:37:25.817184', 'step': 14864, 'epoch': 2} {'type': 'loss', 'content': 0.09898323565721512, 'timestamp': '2025-10-01 04:37:25.819687', 'step': 14865, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:25.878380', 'step': 14865, 'epoch': 2} {'type': 'loss', 'content': 0.10321088880300522, 'timestamp': '2025-10-01 04:37:25.881574', 'step': 14866, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:25.935552', 'step': 14866, 'epoch': 2} {'type': 'loss', 'content': 0.11956179887056351, 'timestamp': '2025-10-01 04:37:25.937907', 'step': 14867, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:25.992569', 'step': 14867, 'epoch': 2} {'type': 'loss', 'content': 0.2537660002708435, 'timestamp': '2025-10-01 04:37:25.998685', 'step': 14868, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:26.055686', 'step': 14868, 'epoch': 2} {'type': 'loss', 'content': 0.19132037460803986, 'timestamp': '2025-10-01 04:37:26.058409', 'step': 14869, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:26.113847', 'step': 14869, 'epoch': 2} {'type': 'loss', 'content': 0.06605575978755951, 'timestamp': '2025-10-01 04:37:26.116388', 'step': 14870, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:26.170251', 'step': 14870, 'epoch': 2} {'type': 'loss', 'content': 0.05726174637675285, 'timestamp': '2025-10-01 04:37:26.172573', 'step': 14871, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:26.227312', 'step': 14871, 'epoch': 2} {'type': 'loss', 'content': 0.08238951116800308, 'timestamp': '2025-10-01 04:37:26.236706', 'step': 14872, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:37:26.296078', 'step': 14872, 'epoch': 2} {'type': 'loss', 'content': 0.1274353712797165, 'timestamp': '2025-10-01 04:37:26.298542', 'step': 14873, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:26.354071', 'step': 14873, 'epoch': 2} {'type': 'loss', 'content': 0.05941210314631462, 'timestamp': '2025-10-01 04:37:26.372222', 'step': 14874, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:26.429428', 'step': 14874, 'epoch': 2} {'type': 'loss', 'content': 0.04757716879248619, 'timestamp': '2025-10-01 04:37:26.432472', 'step': 14875, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:26.491065', 'step': 14875, 'epoch': 2} {'type': 'loss', 'content': 0.06830254197120667, 'timestamp': '2025-10-01 04:37:26.497411', 'step': 14876, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:37:26.553342', 'step': 14876, 'epoch': 2} {'type': 'loss', 'content': 0.1476633995771408, 'timestamp': '2025-10-01 04:37:26.557001', 'step': 14877, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:26.611205', 'step': 14877, 'epoch': 2} {'type': 'loss', 'content': 0.052529387176036835, 'timestamp': '2025-10-01 04:37:26.615223', 'step': 14878, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:26.671522', 'step': 14878, 'epoch': 2} {'type': 'loss', 'content': 0.10570047795772552, 'timestamp': '2025-10-01 04:37:26.678109', 'step': 14879, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:26.732826', 'step': 14879, 'epoch': 2} {'type': 'loss', 'content': 0.12398362904787064, 'timestamp': '2025-10-01 04:37:26.739472', 'step': 14880, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:37:26.794468', 'step': 14880, 'epoch': 2} {'type': 'loss', 'content': 0.032536957412958145, 'timestamp': '2025-10-01 04:37:26.796941', 'step': 14881, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:26.851544', 'step': 14881, 'epoch': 2} {'type': 'loss', 'content': 0.13547587394714355, 'timestamp': '2025-10-01 04:37:26.854368', 'step': 14882, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:26.911659', 'step': 14882, 'epoch': 2} {'type': 'loss', 'content': 0.1365433782339096, 'timestamp': '2025-10-01 04:37:26.914145', 'step': 14883, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:26.969847', 'step': 14883, 'epoch': 2} {'type': 'loss', 'content': 0.07160872966051102, 'timestamp': '2025-10-01 04:37:26.978653', 'step': 14884, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:27.032032', 'step': 14884, 'epoch': 2} {'type': 'loss', 'content': 0.1815304011106491, 'timestamp': '2025-10-01 04:37:27.034541', 'step': 14885, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:27.088899', 'step': 14885, 'epoch': 2} {'type': 'loss', 'content': 0.08782626688480377, 'timestamp': '2025-10-01 04:37:27.091506', 'step': 14886, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:27.147038', 'step': 14886, 'epoch': 2} {'type': 'loss', 'content': 0.08378294110298157, 'timestamp': '2025-10-01 04:37:27.149539', 'step': 14887, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:27.203678', 'step': 14887, 'epoch': 2} {'type': 'loss', 'content': 0.05730316415429115, 'timestamp': '2025-10-01 04:37:27.210140', 'step': 14888, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:37:27.266190', 'step': 14888, 'epoch': 2} {'type': 'loss', 'content': 0.1410498470067978, 'timestamp': '2025-10-01 04:37:27.270216', 'step': 14889, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:27.325057', 'step': 14889, 'epoch': 2} {'type': 'loss', 'content': 0.10614868253469467, 'timestamp': '2025-10-01 04:37:27.327430', 'step': 14890, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:27.385271', 'step': 14890, 'epoch': 2} {'type': 'loss', 'content': 0.179651141166687, 'timestamp': '2025-10-01 04:37:27.389168', 'step': 14891, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:37:27.447478', 'step': 14891, 'epoch': 2} {'type': 'loss', 'content': 0.1652565896511078, 'timestamp': '2025-10-01 04:37:27.453633', 'step': 14892, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:27.522601', 'step': 14892, 'epoch': 2} {'type': 'loss', 'content': 0.07604239135980606, 'timestamp': '2025-10-01 04:37:27.525009', 'step': 14893, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:37:27.579748', 'step': 14893, 'epoch': 2} {'type': 'loss', 'content': 0.1076756939291954, 'timestamp': '2025-10-01 04:37:27.582426', 'step': 14894, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:27.636647', 'step': 14894, 'epoch': 2} {'type': 'loss', 'content': 0.1018858477473259, 'timestamp': '2025-10-01 04:37:27.639009', 'step': 14895, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:27.695143', 'step': 14895, 'epoch': 2} {'type': 'loss', 'content': 0.07450717687606812, 'timestamp': '2025-10-01 04:37:27.701238', 'step': 14896, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:27.754981', 'step': 14896, 'epoch': 2} {'type': 'loss', 'content': 0.14252978563308716, 'timestamp': '2025-10-01 04:37:27.757615', 'step': 14897, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:27.812037', 'step': 14897, 'epoch': 2} {'type': 'loss', 'content': 0.04062386229634285, 'timestamp': '2025-10-01 04:37:27.814845', 'step': 14898, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:27.869214', 'step': 14898, 'epoch': 2} {'type': 'loss', 'content': 0.09182402491569519, 'timestamp': '2025-10-01 04:37:27.871706', 'step': 14899, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:27.926618', 'step': 14899, 'epoch': 2} {'type': 'loss', 'content': 0.16836506128311157, 'timestamp': '2025-10-01 04:37:27.933259', 'step': 14900, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:27.987091', 'step': 14900, 'epoch': 2} {'type': 'loss', 'content': 0.10360338538885117, 'timestamp': '2025-10-01 04:37:27.989576', 'step': 14901, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:28.046231', 'step': 14901, 'epoch': 2} {'type': 'loss', 'content': 0.1176285520195961, 'timestamp': '2025-10-01 04:37:28.048688', 'step': 14902, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-01 04:37:28.106246', 'step': 14902, 'epoch': 2} {'type': 'loss', 'content': 0.18215453624725342, 'timestamp': '2025-10-01 04:37:28.108961', 'step': 14903, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:28.163491', 'step': 14903, 'epoch': 2} {'type': 'loss', 'content': 0.0538799911737442, 'timestamp': '2025-10-01 04:37:28.170491', 'step': 14904, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:28.224905', 'step': 14904, 'epoch': 2} {'type': 'loss', 'content': 0.08548454940319061, 'timestamp': '2025-10-01 04:37:28.227893', 'step': 14905, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:28.282475', 'step': 14905, 'epoch': 2} {'type': 'loss', 'content': 0.09789258241653442, 'timestamp': '2025-10-01 04:37:28.285043', 'step': 14906, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:28.340425', 'step': 14906, 'epoch': 2} {'type': 'loss', 'content': 0.1743842512369156, 'timestamp': '2025-10-01 04:37:28.342965', 'step': 14907, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:28.397146', 'step': 14907, 'epoch': 2} {'type': 'loss', 'content': 0.15584325790405273, 'timestamp': '2025-10-01 04:37:28.403766', 'step': 14908, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:37:28.458317', 'step': 14908, 'epoch': 2} {'type': 'loss', 'content': 0.10852882266044617, 'timestamp': '2025-10-01 04:37:28.460785', 'step': 14909, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:28.515661', 'step': 14909, 'epoch': 2} {'type': 'loss', 'content': 0.09459659457206726, 'timestamp': '2025-10-01 04:37:28.518152', 'step': 14910, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:28.572247', 'step': 14910, 'epoch': 2} {'type': 'loss', 'content': 0.13687537610530853, 'timestamp': '2025-10-01 04:37:28.574829', 'step': 14911, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:28.644356', 'step': 14911, 'epoch': 2} {'type': 'loss', 'content': 0.04514654353260994, 'timestamp': '2025-10-01 04:37:28.650705', 'step': 14912, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-01 04:37:42.564861', 'step': 14912, 'epoch': 2} {'type': 'pplx', 'content': 9633.064045752726, 'timestamp': '2025-10-01 04:37:42.570640', 'step': 14912, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:42.625469', 'step': 14912, 'epoch': 2} {'type': 'loss', 'content': 0.0929737389087677, 'timestamp': '2025-10-01 04:37:42.628018', 'step': 14913, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:42.683214', 'step': 14913, 'epoch': 2} {'type': 'loss', 'content': 0.053674280643463135, 'timestamp': '2025-10-01 04:37:42.685795', 'step': 14914, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:37:42.739736', 'step': 14914, 'epoch': 2} {'type': 'loss', 'content': 0.33407077193260193, 'timestamp': '2025-10-01 04:37:42.742701', 'step': 14915, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:42.796089', 'step': 14915, 'epoch': 2} {'type': 'loss', 'content': 0.11693040281534195, 'timestamp': '2025-10-01 04:37:42.816537', 'step': 14916, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:42.870850', 'step': 14916, 'epoch': 2} {'type': 'loss', 'content': 0.1026022881269455, 'timestamp': '2025-10-01 04:37:42.873238', 'step': 14917, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:42.926756', 'step': 14917, 'epoch': 2} {'type': 'loss', 'content': 0.07911137491464615, 'timestamp': '2025-10-01 04:37:42.929228', 'step': 14918, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:42.983295', 'step': 14918, 'epoch': 2} {'type': 'loss', 'content': 0.05814283713698387, 'timestamp': '2025-10-01 04:37:42.985854', 'step': 14919, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [1, 208], 'flops': 1040006410960.0}, 'timestamp': '2025-10-01 04:37:43.055111', 'step': 14919, 'epoch': 2} {'type': 'loss', 'content': 0.26873213052749634, 'timestamp': '2025-10-01 04:37:43.062113', 'step': 14920, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:43.117485', 'step': 14920, 'epoch': 3} {'type': 'loss', 'content': 0.06173372641205788, 'timestamp': '2025-10-01 04:37:43.121268', 'step': 14921, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:43.176999', 'step': 14921, 'epoch': 3} {'type': 'loss', 'content': 0.09535127878189087, 'timestamp': '2025-10-01 04:37:43.179358', 'step': 14922, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:43.235864', 'step': 14922, 'epoch': 3} {'type': 'loss', 'content': 0.09387186169624329, 'timestamp': '2025-10-01 04:37:43.238296', 'step': 14923, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:43.293120', 'step': 14923, 'epoch': 3} {'type': 'loss', 'content': 0.10684691369533539, 'timestamp': '2025-10-01 04:37:43.299897', 'step': 14924, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:43.354492', 'step': 14924, 'epoch': 3} {'type': 'loss', 'content': 0.15637141466140747, 'timestamp': '2025-10-01 04:37:43.356921', 'step': 14925, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:43.412302', 'step': 14925, 'epoch': 3} {'type': 'loss', 'content': 0.07045823335647583, 'timestamp': '2025-10-01 04:37:43.414593', 'step': 14926, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:43.469960', 'step': 14926, 'epoch': 3} {'type': 'loss', 'content': 0.12380930781364441, 'timestamp': '2025-10-01 04:37:43.471963', 'step': 14927, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:43.525926', 'step': 14927, 'epoch': 3} {'type': 'loss', 'content': 0.04632771387696266, 'timestamp': '2025-10-01 04:37:43.532364', 'step': 14928, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:43.587460', 'step': 14928, 'epoch': 3} {'type': 'loss', 'content': 0.08378815650939941, 'timestamp': '2025-10-01 04:37:43.589558', 'step': 14929, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:43.648598', 'step': 14929, 'epoch': 3} {'type': 'loss', 'content': 0.12001220136880875, 'timestamp': '2025-10-01 04:37:43.651320', 'step': 14930, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:43.705816', 'step': 14930, 'epoch': 3} {'type': 'loss', 'content': 0.05879218876361847, 'timestamp': '2025-10-01 04:37:43.709821', 'step': 14931, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:43.763879', 'step': 14931, 'epoch': 3} {'type': 'loss', 'content': 0.13500404357910156, 'timestamp': '2025-10-01 04:37:43.770294', 'step': 14932, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:43.823585', 'step': 14932, 'epoch': 3} {'type': 'loss', 'content': 0.09374088793992996, 'timestamp': '2025-10-01 04:37:43.826109', 'step': 14933, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:43.880171', 'step': 14933, 'epoch': 3} {'type': 'loss', 'content': 0.07717229425907135, 'timestamp': '2025-10-01 04:37:43.882591', 'step': 14934, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:43.936861', 'step': 14934, 'epoch': 3} {'type': 'loss', 'content': 0.11829524487257004, 'timestamp': '2025-10-01 04:37:43.957355', 'step': 14935, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:44.011366', 'step': 14935, 'epoch': 3} {'type': 'loss', 'content': 0.17206034064292908, 'timestamp': '2025-10-01 04:37:44.018015', 'step': 14936, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:44.071029', 'step': 14936, 'epoch': 3} {'type': 'loss', 'content': 0.051762163639068604, 'timestamp': '2025-10-01 04:37:44.073332', 'step': 14937, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:44.128400', 'step': 14937, 'epoch': 3} {'type': 'loss', 'content': 0.10053808242082596, 'timestamp': '2025-10-01 04:37:44.130803', 'step': 14938, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:37:44.184559', 'step': 14938, 'epoch': 3} {'type': 'loss', 'content': 0.15626280009746552, 'timestamp': '2025-10-01 04:37:44.187026', 'step': 14939, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:44.240569', 'step': 14939, 'epoch': 3} {'type': 'loss', 'content': 0.04766311123967171, 'timestamp': '2025-10-01 04:37:44.246900', 'step': 14940, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:44.299959', 'step': 14940, 'epoch': 3} {'type': 'loss', 'content': 0.04890589043498039, 'timestamp': '2025-10-01 04:37:44.302672', 'step': 14941, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:44.356303', 'step': 14941, 'epoch': 3} {'type': 'loss', 'content': 0.11544496566057205, 'timestamp': '2025-10-01 04:37:44.359686', 'step': 14942, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:44.415462', 'step': 14942, 'epoch': 3} {'type': 'loss', 'content': 0.040521290153265, 'timestamp': '2025-10-01 04:37:44.418096', 'step': 14943, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:44.480782', 'step': 14943, 'epoch': 3} {'type': 'loss', 'content': 0.11047884821891785, 'timestamp': '2025-10-01 04:37:44.487821', 'step': 14944, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:44.543951', 'step': 14944, 'epoch': 3} {'type': 'loss', 'content': 0.05272415652871132, 'timestamp': '2025-10-01 04:37:44.547457', 'step': 14945, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:37:44.607806', 'step': 14945, 'epoch': 3} {'type': 'loss', 'content': 0.1293405443429947, 'timestamp': '2025-10-01 04:37:44.611571', 'step': 14946, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:44.671104', 'step': 14946, 'epoch': 3} {'type': 'loss', 'content': 0.16445522010326385, 'timestamp': '2025-10-01 04:37:44.673873', 'step': 14947, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:44.731014', 'step': 14947, 'epoch': 3} {'type': 'loss', 'content': 0.07876613736152649, 'timestamp': '2025-10-01 04:37:44.749523', 'step': 14948, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:44.806661', 'step': 14948, 'epoch': 3} {'type': 'loss', 'content': 0.11894312500953674, 'timestamp': '2025-10-01 04:37:44.820288', 'step': 14949, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:44.878762', 'step': 14949, 'epoch': 3} {'type': 'loss', 'content': 0.04170344024896622, 'timestamp': '2025-10-01 04:37:44.882235', 'step': 14950, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:44.939184', 'step': 14950, 'epoch': 3} {'type': 'loss', 'content': 0.11635420471429825, 'timestamp': '2025-10-01 04:37:44.941921', 'step': 14951, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:45.000074', 'step': 14951, 'epoch': 3} {'type': 'loss', 'content': 0.04885683208703995, 'timestamp': '2025-10-01 04:37:45.007670', 'step': 14952, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:45.065088', 'step': 14952, 'epoch': 3} {'type': 'loss', 'content': 0.1800985038280487, 'timestamp': '2025-10-01 04:37:45.067636', 'step': 14953, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:45.126935', 'step': 14953, 'epoch': 3} {'type': 'loss', 'content': 0.10494143515825272, 'timestamp': '2025-10-01 04:37:45.130139', 'step': 14954, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:45.184864', 'step': 14954, 'epoch': 3} {'type': 'loss', 'content': 0.1499420553445816, 'timestamp': '2025-10-01 04:37:45.188726', 'step': 14955, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:45.243171', 'step': 14955, 'epoch': 3} {'type': 'loss', 'content': 0.04400014132261276, 'timestamp': '2025-10-01 04:37:45.250199', 'step': 14956, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:45.314908', 'step': 14956, 'epoch': 3} {'type': 'loss', 'content': 0.084617979824543, 'timestamp': '2025-10-01 04:37:45.317646', 'step': 14957, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:45.371541', 'step': 14957, 'epoch': 3} {'type': 'loss', 'content': 0.07751135528087616, 'timestamp': '2025-10-01 04:37:45.374130', 'step': 14958, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:45.428673', 'step': 14958, 'epoch': 3} {'type': 'loss', 'content': 0.14731165766716003, 'timestamp': '2025-10-01 04:37:45.431699', 'step': 14959, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:45.485695', 'step': 14959, 'epoch': 3} {'type': 'loss', 'content': 0.10743389278650284, 'timestamp': '2025-10-01 04:37:45.504798', 'step': 14960, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:45.561033', 'step': 14960, 'epoch': 3} {'type': 'loss', 'content': 0.08447258919477463, 'timestamp': '2025-10-01 04:37:45.576025', 'step': 14961, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:45.630475', 'step': 14961, 'epoch': 3} {'type': 'loss', 'content': 0.12105780839920044, 'timestamp': '2025-10-01 04:37:45.633051', 'step': 14962, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:45.686647', 'step': 14962, 'epoch': 3} {'type': 'loss', 'content': 0.11632679402828217, 'timestamp': '2025-10-01 04:37:45.689884', 'step': 14963, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:45.743882', 'step': 14963, 'epoch': 3} {'type': 'loss', 'content': 0.11198072880506516, 'timestamp': '2025-10-01 04:37:45.750440', 'step': 14964, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:45.804896', 'step': 14964, 'epoch': 3} {'type': 'loss', 'content': 0.1794281005859375, 'timestamp': '2025-10-01 04:37:45.807374', 'step': 14965, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:45.862057', 'step': 14965, 'epoch': 3} {'type': 'loss', 'content': 0.07681223005056381, 'timestamp': '2025-10-01 04:37:45.864703', 'step': 14966, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:37:45.918880', 'step': 14966, 'epoch': 3} {'type': 'loss', 'content': 0.11716319620609283, 'timestamp': '2025-10-01 04:37:45.921437', 'step': 14967, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:45.975557', 'step': 14967, 'epoch': 3} {'type': 'loss', 'content': 0.12480726838111877, 'timestamp': '2025-10-01 04:37:45.981865', 'step': 14968, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:46.034488', 'step': 14968, 'epoch': 3} {'type': 'loss', 'content': 0.1338951736688614, 'timestamp': '2025-10-01 04:37:46.037758', 'step': 14969, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:46.094296', 'step': 14969, 'epoch': 3} {'type': 'loss', 'content': 0.07041595876216888, 'timestamp': '2025-10-01 04:37:46.097523', 'step': 14970, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:46.152011', 'step': 14970, 'epoch': 3} {'type': 'loss', 'content': 0.12920048832893372, 'timestamp': '2025-10-01 04:37:46.154505', 'step': 14971, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:46.208226', 'step': 14971, 'epoch': 3} {'type': 'loss', 'content': 0.14839774370193481, 'timestamp': '2025-10-01 04:37:46.214368', 'step': 14972, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:46.269542', 'step': 14972, 'epoch': 3} {'type': 'loss', 'content': 0.06990690529346466, 'timestamp': '2025-10-01 04:37:46.272221', 'step': 14973, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:37:46.326831', 'step': 14973, 'epoch': 3} {'type': 'loss', 'content': 0.08433569967746735, 'timestamp': '2025-10-01 04:37:46.329349', 'step': 14974, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:37:46.383464', 'step': 14974, 'epoch': 3} {'type': 'loss', 'content': 0.10410311073064804, 'timestamp': '2025-10-01 04:37:46.386097', 'step': 14975, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:37:46.439450', 'step': 14975, 'epoch': 3} {'type': 'loss', 'content': 0.07027594745159149, 'timestamp': '2025-10-01 04:37:46.445543', 'step': 14976, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:46.503206', 'step': 14976, 'epoch': 3} {'type': 'loss', 'content': 0.0822819322347641, 'timestamp': '2025-10-01 04:37:46.505982', 'step': 14977, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:46.569807', 'step': 14977, 'epoch': 3} {'type': 'loss', 'content': 0.12174587696790695, 'timestamp': '2025-10-01 04:37:46.572593', 'step': 14978, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:46.626374', 'step': 14978, 'epoch': 3} {'type': 'loss', 'content': 0.1294393539428711, 'timestamp': '2025-10-01 04:37:46.629113', 'step': 14979, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:37:46.683526', 'step': 14979, 'epoch': 3} {'type': 'loss', 'content': 0.10321945697069168, 'timestamp': '2025-10-01 04:37:46.689770', 'step': 14980, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:46.742566', 'step': 14980, 'epoch': 3} {'type': 'loss', 'content': 0.13621236383914948, 'timestamp': '2025-10-01 04:37:46.745134', 'step': 14981, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:46.798558', 'step': 14981, 'epoch': 3} {'type': 'loss', 'content': 0.08249667286872864, 'timestamp': '2025-10-01 04:37:46.801196', 'step': 14982, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:46.855528', 'step': 14982, 'epoch': 3} {'type': 'loss', 'content': 0.23887640237808228, 'timestamp': '2025-10-01 04:37:46.859227', 'step': 14983, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:46.912866', 'step': 14983, 'epoch': 3} {'type': 'loss', 'content': 0.07996833324432373, 'timestamp': '2025-10-01 04:37:46.919484', 'step': 14984, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:46.973039', 'step': 14984, 'epoch': 3} {'type': 'loss', 'content': 0.0449582077562809, 'timestamp': '2025-10-01 04:37:46.975667', 'step': 14985, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:37:47.029194', 'step': 14985, 'epoch': 3} {'type': 'loss', 'content': 0.15835240483283997, 'timestamp': '2025-10-01 04:37:47.031868', 'step': 14986, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:37:47.086885', 'step': 14986, 'epoch': 3} {'type': 'loss', 'content': 0.056835394352674484, 'timestamp': '2025-10-01 04:37:47.089812', 'step': 14987, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:47.143722', 'step': 14987, 'epoch': 3} {'type': 'loss', 'content': 0.11943238973617554, 'timestamp': '2025-10-01 04:37:47.150988', 'step': 14988, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:47.204427', 'step': 14988, 'epoch': 3} {'type': 'loss', 'content': 0.13581494987010956, 'timestamp': '2025-10-01 04:37:47.207197', 'step': 14989, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:47.270442', 'step': 14989, 'epoch': 3} {'type': 'loss', 'content': 0.15851835906505585, 'timestamp': '2025-10-01 04:37:47.273320', 'step': 14990, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:47.327198', 'step': 14990, 'epoch': 3} {'type': 'loss', 'content': 0.09385570883750916, 'timestamp': '2025-10-01 04:37:47.329879', 'step': 14991, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:47.383683', 'step': 14991, 'epoch': 3} {'type': 'loss', 'content': 0.08922472596168518, 'timestamp': '2025-10-01 04:37:47.389816', 'step': 14992, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:47.451520', 'step': 14992, 'epoch': 3} {'type': 'loss', 'content': 0.1436501294374466, 'timestamp': '2025-10-01 04:37:47.453849', 'step': 14993, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:47.508153', 'step': 14993, 'epoch': 3} {'type': 'loss', 'content': 0.037120867520570755, 'timestamp': '2025-10-01 04:37:47.510600', 'step': 14994, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:47.563803', 'step': 14994, 'epoch': 3} {'type': 'loss', 'content': 0.10707712173461914, 'timestamp': '2025-10-01 04:37:47.566392', 'step': 14995, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:47.619878', 'step': 14995, 'epoch': 3} {'type': 'loss', 'content': 0.11259637773036957, 'timestamp': '2025-10-01 04:37:47.640647', 'step': 14996, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:47.693366', 'step': 14996, 'epoch': 3} {'type': 'loss', 'content': 0.07006120681762695, 'timestamp': '2025-10-01 04:37:47.698714', 'step': 14997, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:47.752445', 'step': 14997, 'epoch': 3} {'type': 'loss', 'content': 0.06726261228322983, 'timestamp': '2025-10-01 04:37:47.755406', 'step': 14998, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:47.809148', 'step': 14998, 'epoch': 3} {'type': 'loss', 'content': 0.07599452883005142, 'timestamp': '2025-10-01 04:37:47.811564', 'step': 14999, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:47.865177', 'step': 14999, 'epoch': 3} {'type': 'loss', 'content': 0.10828255116939545, 'timestamp': '2025-10-01 04:37:47.871596', 'step': 15000, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 15000', 'timestamp': '2025-10-01 04:37:48.253011', 'step': 15000, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:37:48.308426', 'step': 15000, 'epoch': 3} {'type': 'loss', 'content': 0.10355015844106674, 'timestamp': '2025-10-01 04:37:48.310814', 'step': 15001, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:48.364591', 'step': 15001, 'epoch': 3} {'type': 'loss', 'content': 0.10448913276195526, 'timestamp': '2025-10-01 04:37:48.367205', 'step': 15002, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:48.420526', 'step': 15002, 'epoch': 3} {'type': 'loss', 'content': 0.08440385013818741, 'timestamp': '2025-10-01 04:37:48.422969', 'step': 15003, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:48.476479', 'step': 15003, 'epoch': 3} {'type': 'loss', 'content': 0.11099644005298615, 'timestamp': '2025-10-01 04:37:48.482903', 'step': 15004, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:48.535950', 'step': 15004, 'epoch': 3} {'type': 'loss', 'content': 0.06410349905490875, 'timestamp': '2025-10-01 04:37:48.538308', 'step': 15005, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:48.591177', 'step': 15005, 'epoch': 3} {'type': 'loss', 'content': 0.05600285902619362, 'timestamp': '2025-10-01 04:37:48.593536', 'step': 15006, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:48.651494', 'step': 15006, 'epoch': 3} {'type': 'loss', 'content': 0.10100700706243515, 'timestamp': '2025-10-01 04:37:48.653998', 'step': 15007, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:48.707147', 'step': 15007, 'epoch': 3} {'type': 'loss', 'content': 0.11380831152200699, 'timestamp': '2025-10-01 04:37:48.713459', 'step': 15008, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:48.766676', 'step': 15008, 'epoch': 3} {'type': 'loss', 'content': 0.06552987545728683, 'timestamp': '2025-10-01 04:37:48.769016', 'step': 15009, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:48.824611', 'step': 15009, 'epoch': 3} {'type': 'loss', 'content': 0.17779450118541718, 'timestamp': '2025-10-01 04:37:48.831131', 'step': 15010, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:48.884693', 'step': 15010, 'epoch': 3} {'type': 'loss', 'content': 0.06636523455381393, 'timestamp': '2025-10-01 04:37:48.887080', 'step': 15011, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:48.941212', 'step': 15011, 'epoch': 3} {'type': 'loss', 'content': 0.07794923335313797, 'timestamp': '2025-10-01 04:37:48.947657', 'step': 15012, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:49.001896', 'step': 15012, 'epoch': 3} {'type': 'loss', 'content': 0.11667578667402267, 'timestamp': '2025-10-01 04:37:49.005542', 'step': 15013, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:37:49.060578', 'step': 15013, 'epoch': 3} {'type': 'loss', 'content': 0.07024841755628586, 'timestamp': '2025-10-01 04:37:49.063072', 'step': 15014, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:49.118673', 'step': 15014, 'epoch': 3} {'type': 'loss', 'content': 0.0590822696685791, 'timestamp': '2025-10-01 04:37:49.122593', 'step': 15015, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:49.176623', 'step': 15015, 'epoch': 3} {'type': 'loss', 'content': 0.11939728260040283, 'timestamp': '2025-10-01 04:37:49.184152', 'step': 15016, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:49.237112', 'step': 15016, 'epoch': 3} {'type': 'loss', 'content': 0.0793721079826355, 'timestamp': '2025-10-01 04:37:49.239660', 'step': 15017, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:49.293686', 'step': 15017, 'epoch': 3} {'type': 'loss', 'content': 0.11111211031675339, 'timestamp': '2025-10-01 04:37:49.295961', 'step': 15018, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:49.349580', 'step': 15018, 'epoch': 3} {'type': 'loss', 'content': 0.07666594535112381, 'timestamp': '2025-10-01 04:37:49.352081', 'step': 15019, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:49.407491', 'step': 15019, 'epoch': 3} {'type': 'loss', 'content': 0.09860361367464066, 'timestamp': '2025-10-01 04:37:49.414236', 'step': 15020, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:49.468474', 'step': 15020, 'epoch': 3} {'type': 'loss', 'content': 0.21944475173950195, 'timestamp': '2025-10-01 04:37:49.471556', 'step': 15021, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:49.527571', 'step': 15021, 'epoch': 3} {'type': 'loss', 'content': 0.07069884985685349, 'timestamp': '2025-10-01 04:37:49.530387', 'step': 15022, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:49.585262', 'step': 15022, 'epoch': 3} {'type': 'loss', 'content': 0.10825403779745102, 'timestamp': '2025-10-01 04:37:49.588101', 'step': 15023, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:49.645495', 'step': 15023, 'epoch': 3} {'type': 'loss', 'content': 0.15153075754642487, 'timestamp': '2025-10-01 04:37:49.651863', 'step': 15024, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:49.705474', 'step': 15024, 'epoch': 3} {'type': 'loss', 'content': 0.11808153986930847, 'timestamp': '2025-10-01 04:37:49.707904', 'step': 15025, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:49.762106', 'step': 15025, 'epoch': 3} {'type': 'loss', 'content': 0.09555085003376007, 'timestamp': '2025-10-01 04:37:49.764557', 'step': 15026, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:49.818629', 'step': 15026, 'epoch': 3} {'type': 'loss', 'content': 0.11596378684043884, 'timestamp': '2025-10-01 04:37:49.821241', 'step': 15027, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:49.875572', 'step': 15027, 'epoch': 3} {'type': 'loss', 'content': 0.14225630462169647, 'timestamp': '2025-10-01 04:37:49.881785', 'step': 15028, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:37:49.940459', 'step': 15028, 'epoch': 3} {'type': 'loss', 'content': 0.07054130733013153, 'timestamp': '2025-10-01 04:37:49.943442', 'step': 15029, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:49.997587', 'step': 15029, 'epoch': 3} {'type': 'loss', 'content': 0.15007881820201874, 'timestamp': '2025-10-01 04:37:49.999906', 'step': 15030, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:50.054298', 'step': 15030, 'epoch': 3} {'type': 'loss', 'content': 0.06881874054670334, 'timestamp': '2025-10-01 04:37:50.056783', 'step': 15031, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:50.111116', 'step': 15031, 'epoch': 3} {'type': 'loss', 'content': 0.08269888162612915, 'timestamp': '2025-10-01 04:37:50.117190', 'step': 15032, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:50.171081', 'step': 15032, 'epoch': 3} {'type': 'loss', 'content': 0.11863402277231216, 'timestamp': '2025-10-01 04:37:50.173706', 'step': 15033, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:50.228021', 'step': 15033, 'epoch': 3} {'type': 'loss', 'content': 0.20495514571666718, 'timestamp': '2025-10-01 04:37:50.230252', 'step': 15034, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:37:50.284841', 'step': 15034, 'epoch': 3} {'type': 'loss', 'content': 0.06028478220105171, 'timestamp': '2025-10-01 04:37:50.287455', 'step': 15035, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:50.341506', 'step': 15035, 'epoch': 3} {'type': 'loss', 'content': 0.17601074278354645, 'timestamp': '2025-10-01 04:37:50.347423', 'step': 15036, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:50.400272', 'step': 15036, 'epoch': 3} {'type': 'loss', 'content': 0.12872692942619324, 'timestamp': '2025-10-01 04:37:50.403001', 'step': 15037, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:50.458151', 'step': 15037, 'epoch': 3} {'type': 'loss', 'content': 0.15233683586120605, 'timestamp': '2025-10-01 04:37:50.460411', 'step': 15038, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:37:50.514659', 'step': 15038, 'epoch': 3} {'type': 'loss', 'content': 0.07367509603500366, 'timestamp': '2025-10-01 04:37:50.517765', 'step': 15039, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:50.571636', 'step': 15039, 'epoch': 3} {'type': 'loss', 'content': 0.1976124793291092, 'timestamp': '2025-10-01 04:37:50.577813', 'step': 15040, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:50.630943', 'step': 15040, 'epoch': 3} {'type': 'loss', 'content': 0.12501655519008636, 'timestamp': '2025-10-01 04:37:50.633262', 'step': 15041, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:37:50.687534', 'step': 15041, 'epoch': 3} {'type': 'loss', 'content': 0.10553748160600662, 'timestamp': '2025-10-01 04:37:50.689940', 'step': 15042, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:37:50.743384', 'step': 15042, 'epoch': 3} {'type': 'loss', 'content': 0.2159213423728943, 'timestamp': '2025-10-01 04:37:50.745632', 'step': 15043, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:50.799359', 'step': 15043, 'epoch': 3} {'type': 'loss', 'content': 0.0970572754740715, 'timestamp': '2025-10-01 04:37:50.805400', 'step': 15044, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:50.857775', 'step': 15044, 'epoch': 3} {'type': 'loss', 'content': 0.03735724464058876, 'timestamp': '2025-10-01 04:37:50.860098', 'step': 15045, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:50.913077', 'step': 15045, 'epoch': 3} {'type': 'loss', 'content': 0.059233538806438446, 'timestamp': '2025-10-01 04:37:50.915227', 'step': 15046, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:50.968726', 'step': 15046, 'epoch': 3} {'type': 'loss', 'content': 0.12682005763053894, 'timestamp': '2025-10-01 04:37:50.971406', 'step': 15047, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:51.027633', 'step': 15047, 'epoch': 3} {'type': 'loss', 'content': 0.24309444427490234, 'timestamp': '2025-10-01 04:37:51.033214', 'step': 15048, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:51.086902', 'step': 15048, 'epoch': 3} {'type': 'loss', 'content': 0.14826565980911255, 'timestamp': '2025-10-01 04:37:51.089633', 'step': 15049, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:51.142791', 'step': 15049, 'epoch': 3} {'type': 'loss', 'content': 0.10707008838653564, 'timestamp': '2025-10-01 04:37:51.145177', 'step': 15050, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:51.200101', 'step': 15050, 'epoch': 3} {'type': 'loss', 'content': 0.08217894285917282, 'timestamp': '2025-10-01 04:37:51.202361', 'step': 15051, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:51.255555', 'step': 15051, 'epoch': 3} {'type': 'loss', 'content': 0.09239009767770767, 'timestamp': '2025-10-01 04:37:51.261393', 'step': 15052, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:51.315061', 'step': 15052, 'epoch': 3} {'type': 'loss', 'content': 0.18963022530078888, 'timestamp': '2025-10-01 04:37:51.317262', 'step': 15053, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:51.370318', 'step': 15053, 'epoch': 3} {'type': 'loss', 'content': 0.16692960262298584, 'timestamp': '2025-10-01 04:37:51.373540', 'step': 15054, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:51.427332', 'step': 15054, 'epoch': 3} {'type': 'loss', 'content': 0.08558665215969086, 'timestamp': '2025-10-01 04:37:51.429876', 'step': 15055, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:51.483589', 'step': 15055, 'epoch': 3} {'type': 'loss', 'content': 0.09324309229850769, 'timestamp': '2025-10-01 04:37:51.489485', 'step': 15056, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:51.542679', 'step': 15056, 'epoch': 3} {'type': 'loss', 'content': 0.09897349029779434, 'timestamp': '2025-10-01 04:37:51.546423', 'step': 15057, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:51.600275', 'step': 15057, 'epoch': 3} {'type': 'loss', 'content': 0.12299924343824387, 'timestamp': '2025-10-01 04:37:51.602549', 'step': 15058, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:51.656339', 'step': 15058, 'epoch': 3} {'type': 'loss', 'content': 0.11512017995119095, 'timestamp': '2025-10-01 04:37:51.658537', 'step': 15059, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:37:51.712132', 'step': 15059, 'epoch': 3} {'type': 'loss', 'content': 0.11811095476150513, 'timestamp': '2025-10-01 04:37:51.717995', 'step': 15060, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:51.771406', 'step': 15060, 'epoch': 3} {'type': 'loss', 'content': 0.16706308722496033, 'timestamp': '2025-10-01 04:37:51.773786', 'step': 15061, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:51.829074', 'step': 15061, 'epoch': 3} {'type': 'loss', 'content': 0.09304749965667725, 'timestamp': '2025-10-01 04:37:51.831849', 'step': 15062, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:51.885576', 'step': 15062, 'epoch': 3} {'type': 'loss', 'content': 0.17194172739982605, 'timestamp': '2025-10-01 04:37:51.888296', 'step': 15063, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:51.942190', 'step': 15063, 'epoch': 3} {'type': 'loss', 'content': 0.1650557667016983, 'timestamp': '2025-10-01 04:37:51.961064', 'step': 15064, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:52.013924', 'step': 15064, 'epoch': 3} {'type': 'loss', 'content': 0.12748628854751587, 'timestamp': '2025-10-01 04:37:52.016435', 'step': 15065, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:52.070644', 'step': 15065, 'epoch': 3} {'type': 'loss', 'content': 0.15996108949184418, 'timestamp': '2025-10-01 04:37:52.072920', 'step': 15066, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:52.126475', 'step': 15066, 'epoch': 3} {'type': 'loss', 'content': 0.12008538097143173, 'timestamp': '2025-10-01 04:37:52.128706', 'step': 15067, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:52.182290', 'step': 15067, 'epoch': 3} {'type': 'loss', 'content': 0.1104637086391449, 'timestamp': '2025-10-01 04:37:52.188048', 'step': 15068, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:37:52.241097', 'step': 15068, 'epoch': 3} {'type': 'loss', 'content': 0.05430857092142105, 'timestamp': '2025-10-01 04:37:52.243543', 'step': 15069, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:52.296843', 'step': 15069, 'epoch': 3} {'type': 'loss', 'content': 0.11313346773386002, 'timestamp': '2025-10-01 04:37:52.299177', 'step': 15070, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:37:52.353309', 'step': 15070, 'epoch': 3} {'type': 'loss', 'content': 0.08748912066221237, 'timestamp': '2025-10-01 04:37:52.355573', 'step': 15071, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:37:52.409456', 'step': 15071, 'epoch': 3} {'type': 'loss', 'content': 0.060097504407167435, 'timestamp': '2025-10-01 04:37:52.415247', 'step': 15072, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:52.471051', 'step': 15072, 'epoch': 3} {'type': 'loss', 'content': 0.09278027713298798, 'timestamp': '2025-10-01 04:37:52.473314', 'step': 15073, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:52.526674', 'step': 15073, 'epoch': 3} {'type': 'loss', 'content': 0.1748601198196411, 'timestamp': '2025-10-01 04:37:52.529385', 'step': 15074, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:37:52.583286', 'step': 15074, 'epoch': 3} {'type': 'loss', 'content': 0.23543408513069153, 'timestamp': '2025-10-01 04:37:52.585554', 'step': 15075, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:52.638915', 'step': 15075, 'epoch': 3} {'type': 'loss', 'content': 0.13611680269241333, 'timestamp': '2025-10-01 04:37:52.644760', 'step': 15076, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:52.698749', 'step': 15076, 'epoch': 3} {'type': 'loss', 'content': 0.03488006815314293, 'timestamp': '2025-10-01 04:37:52.701096', 'step': 15077, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:52.754381', 'step': 15077, 'epoch': 3} {'type': 'loss', 'content': 0.17385923862457275, 'timestamp': '2025-10-01 04:37:52.756760', 'step': 15078, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:52.810823', 'step': 15078, 'epoch': 3} {'type': 'loss', 'content': 0.09546621888875961, 'timestamp': '2025-10-01 04:37:52.831847', 'step': 15079, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:52.885389', 'step': 15079, 'epoch': 3} {'type': 'loss', 'content': 0.10681910812854767, 'timestamp': '2025-10-01 04:37:52.891640', 'step': 15080, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:37:52.944342', 'step': 15080, 'epoch': 3} {'type': 'loss', 'content': 0.14817176759243011, 'timestamp': '2025-10-01 04:37:52.946563', 'step': 15081, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:53.001623', 'step': 15081, 'epoch': 3} {'type': 'loss', 'content': 0.19025535881519318, 'timestamp': '2025-10-01 04:37:53.004725', 'step': 15082, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:53.059562', 'step': 15082, 'epoch': 3} {'type': 'loss', 'content': 0.08624454587697983, 'timestamp': '2025-10-01 04:37:53.062319', 'step': 15083, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:53.117867', 'step': 15083, 'epoch': 3} {'type': 'loss', 'content': 0.1070285439491272, 'timestamp': '2025-10-01 04:37:53.123655', 'step': 15084, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:53.176750', 'step': 15084, 'epoch': 3} {'type': 'loss', 'content': 0.1366320550441742, 'timestamp': '2025-10-01 04:37:53.180439', 'step': 15085, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:37:53.235065', 'step': 15085, 'epoch': 3} {'type': 'loss', 'content': 0.09443686902523041, 'timestamp': '2025-10-01 04:37:53.237969', 'step': 15086, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:53.291243', 'step': 15086, 'epoch': 3} {'type': 'loss', 'content': 0.0757107138633728, 'timestamp': '2025-10-01 04:37:53.293559', 'step': 15087, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:53.346816', 'step': 15087, 'epoch': 3} {'type': 'loss', 'content': 0.08160898834466934, 'timestamp': '2025-10-01 04:37:53.352619', 'step': 15088, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:53.405914', 'step': 15088, 'epoch': 3} {'type': 'loss', 'content': 0.06050759553909302, 'timestamp': '2025-10-01 04:37:53.418490', 'step': 15089, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:53.472010', 'step': 15089, 'epoch': 3} {'type': 'loss', 'content': 0.09906599670648575, 'timestamp': '2025-10-01 04:37:53.474372', 'step': 15090, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:53.528440', 'step': 15090, 'epoch': 3} {'type': 'loss', 'content': 0.08859112858772278, 'timestamp': '2025-10-01 04:37:53.536193', 'step': 15091, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:53.591124', 'step': 15091, 'epoch': 3} {'type': 'loss', 'content': 0.16906382143497467, 'timestamp': '2025-10-01 04:37:53.597159', 'step': 15092, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:53.650364', 'step': 15092, 'epoch': 3} {'type': 'loss', 'content': 0.1976444125175476, 'timestamp': '2025-10-01 04:37:53.653696', 'step': 15093, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:53.707343', 'step': 15093, 'epoch': 3} {'type': 'loss', 'content': 0.122977614402771, 'timestamp': '2025-10-01 04:37:53.709443', 'step': 15094, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:53.763674', 'step': 15094, 'epoch': 3} {'type': 'loss', 'content': 0.1558825522661209, 'timestamp': '2025-10-01 04:37:53.766251', 'step': 15095, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:53.821453', 'step': 15095, 'epoch': 3} {'type': 'loss', 'content': 0.17545799911022186, 'timestamp': '2025-10-01 04:37:53.827461', 'step': 15096, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:53.880216', 'step': 15096, 'epoch': 3} {'type': 'loss', 'content': 0.13456745445728302, 'timestamp': '2025-10-01 04:37:53.882785', 'step': 15097, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:53.948580', 'step': 15097, 'epoch': 3} {'type': 'loss', 'content': 0.0407562330365181, 'timestamp': '2025-10-01 04:37:53.950867', 'step': 15098, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:54.004917', 'step': 15098, 'epoch': 3} {'type': 'loss', 'content': 0.1737031787633896, 'timestamp': '2025-10-01 04:37:54.007178', 'step': 15099, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:54.061456', 'step': 15099, 'epoch': 3} {'type': 'loss', 'content': 0.08413663506507874, 'timestamp': '2025-10-01 04:37:54.067199', 'step': 15100, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:37:54.120717', 'step': 15100, 'epoch': 3} {'type': 'loss', 'content': 0.1284569799900055, 'timestamp': '2025-10-01 04:37:54.122920', 'step': 15101, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:54.176692', 'step': 15101, 'epoch': 3} {'type': 'loss', 'content': 0.043664220720529556, 'timestamp': '2025-10-01 04:37:54.178925', 'step': 15102, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:54.233715', 'step': 15102, 'epoch': 3} {'type': 'loss', 'content': 0.16216805577278137, 'timestamp': '2025-10-01 04:37:54.236038', 'step': 15103, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:54.307877', 'step': 15103, 'epoch': 3} {'type': 'loss', 'content': 0.06348860263824463, 'timestamp': '2025-10-01 04:37:54.313766', 'step': 15104, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:37:54.367108', 'step': 15104, 'epoch': 3} {'type': 'loss', 'content': 0.1094079539179802, 'timestamp': '2025-10-01 04:37:54.369219', 'step': 15105, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:54.423248', 'step': 15105, 'epoch': 3} {'type': 'loss', 'content': 0.0497901551425457, 'timestamp': '2025-10-01 04:37:54.425628', 'step': 15106, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:54.479295', 'step': 15106, 'epoch': 3} {'type': 'loss', 'content': 0.11896689236164093, 'timestamp': '2025-10-01 04:37:54.481670', 'step': 15107, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:54.536495', 'step': 15107, 'epoch': 3} {'type': 'loss', 'content': 0.11808811873197556, 'timestamp': '2025-10-01 04:37:54.542245', 'step': 15108, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:37:54.595930', 'step': 15108, 'epoch': 3} {'type': 'loss', 'content': 0.12925340235233307, 'timestamp': '2025-10-01 04:37:54.598139', 'step': 15109, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:37:54.651582', 'step': 15109, 'epoch': 3} {'type': 'loss', 'content': 0.09909836947917938, 'timestamp': '2025-10-01 04:37:54.653789', 'step': 15110, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:54.707730', 'step': 15110, 'epoch': 3} {'type': 'loss', 'content': 0.05817326903343201, 'timestamp': '2025-10-01 04:37:54.709987', 'step': 15111, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:54.762580', 'step': 15111, 'epoch': 3} {'type': 'loss', 'content': 0.1348893791437149, 'timestamp': '2025-10-01 04:37:54.768461', 'step': 15112, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:54.821622', 'step': 15112, 'epoch': 3} {'type': 'loss', 'content': 0.12284611165523529, 'timestamp': '2025-10-01 04:37:54.823822', 'step': 15113, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:54.877321', 'step': 15113, 'epoch': 3} {'type': 'loss', 'content': 0.1466587632894516, 'timestamp': '2025-10-01 04:37:54.879910', 'step': 15114, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:37:54.937243', 'step': 15114, 'epoch': 3} {'type': 'loss', 'content': 0.1156260296702385, 'timestamp': '2025-10-01 04:37:54.939571', 'step': 15115, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:55.001818', 'step': 15115, 'epoch': 3} {'type': 'loss', 'content': 0.07786170393228531, 'timestamp': '2025-10-01 04:37:55.007594', 'step': 15116, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:55.060376', 'step': 15116, 'epoch': 3} {'type': 'loss', 'content': 0.11874855309724808, 'timestamp': '2025-10-01 04:37:55.062648', 'step': 15117, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:55.116108', 'step': 15117, 'epoch': 3} {'type': 'loss', 'content': 0.11432473361492157, 'timestamp': '2025-10-01 04:37:55.118548', 'step': 15118, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:55.172459', 'step': 15118, 'epoch': 3} {'type': 'loss', 'content': 0.06628187745809555, 'timestamp': '2025-10-01 04:37:55.175436', 'step': 15119, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:37:55.232817', 'step': 15119, 'epoch': 3} {'type': 'loss', 'content': 0.12216674536466599, 'timestamp': '2025-10-01 04:37:55.238657', 'step': 15120, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:55.292338', 'step': 15120, 'epoch': 3} {'type': 'loss', 'content': 0.18801534175872803, 'timestamp': '2025-10-01 04:37:55.295574', 'step': 15121, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:55.349901', 'step': 15121, 'epoch': 3} {'type': 'loss', 'content': 0.07523665577173233, 'timestamp': '2025-10-01 04:37:55.353472', 'step': 15122, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:37:55.410379', 'step': 15122, 'epoch': 3} {'type': 'loss', 'content': 0.10291555523872375, 'timestamp': '2025-10-01 04:37:55.412737', 'step': 15123, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:55.469686', 'step': 15123, 'epoch': 3} {'type': 'loss', 'content': 0.10099827498197556, 'timestamp': '2025-10-01 04:37:55.475725', 'step': 15124, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:55.528939', 'step': 15124, 'epoch': 3} {'type': 'loss', 'content': 0.1635049432516098, 'timestamp': '2025-10-01 04:37:55.531434', 'step': 15125, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:55.585385', 'step': 15125, 'epoch': 3} {'type': 'loss', 'content': 0.08691390603780746, 'timestamp': '2025-10-01 04:37:55.587650', 'step': 15126, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:55.650534', 'step': 15126, 'epoch': 3} {'type': 'loss', 'content': 0.1372147649526596, 'timestamp': '2025-10-01 04:37:55.653033', 'step': 15127, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:55.707019', 'step': 15127, 'epoch': 3} {'type': 'loss', 'content': 0.14580261707305908, 'timestamp': '2025-10-01 04:37:55.712770', 'step': 15128, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:55.765783', 'step': 15128, 'epoch': 3} {'type': 'loss', 'content': 0.09397141635417938, 'timestamp': '2025-10-01 04:37:55.768036', 'step': 15129, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:55.831507', 'step': 15129, 'epoch': 3} {'type': 'loss', 'content': 0.22865606844425201, 'timestamp': '2025-10-01 04:37:55.833749', 'step': 15130, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:55.887451', 'step': 15130, 'epoch': 3} {'type': 'loss', 'content': 0.07884059101343155, 'timestamp': '2025-10-01 04:37:55.889717', 'step': 15131, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:55.944669', 'step': 15131, 'epoch': 3} {'type': 'loss', 'content': 0.14955805242061615, 'timestamp': '2025-10-01 04:37:55.950590', 'step': 15132, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:56.003226', 'step': 15132, 'epoch': 3} {'type': 'loss', 'content': 0.11785888671875, 'timestamp': '2025-10-01 04:37:56.005555', 'step': 15133, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:56.059381', 'step': 15133, 'epoch': 3} {'type': 'loss', 'content': 0.22141008079051971, 'timestamp': '2025-10-01 04:37:56.061654', 'step': 15134, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:56.115987', 'step': 15134, 'epoch': 3} {'type': 'loss', 'content': 0.10328727215528488, 'timestamp': '2025-10-01 04:37:56.118884', 'step': 15135, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:56.172757', 'step': 15135, 'epoch': 3} {'type': 'loss', 'content': 0.17012031376361847, 'timestamp': '2025-10-01 04:37:56.178685', 'step': 15136, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:56.231734', 'step': 15136, 'epoch': 3} {'type': 'loss', 'content': 0.09897057712078094, 'timestamp': '2025-10-01 04:37:56.233930', 'step': 15137, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:56.287205', 'step': 15137, 'epoch': 3} {'type': 'loss', 'content': 0.0960126668214798, 'timestamp': '2025-10-01 04:37:56.299174', 'step': 15138, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:56.354788', 'step': 15138, 'epoch': 3} {'type': 'loss', 'content': 0.1564149409532547, 'timestamp': '2025-10-01 04:37:56.357454', 'step': 15139, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:56.411445', 'step': 15139, 'epoch': 3} {'type': 'loss', 'content': 0.1168224886059761, 'timestamp': '2025-10-01 04:37:56.417883', 'step': 15140, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:56.484841', 'step': 15140, 'epoch': 3} {'type': 'loss', 'content': 0.1361192762851715, 'timestamp': '2025-10-01 04:37:56.487182', 'step': 15141, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:56.541245', 'step': 15141, 'epoch': 3} {'type': 'loss', 'content': 0.07188599556684494, 'timestamp': '2025-10-01 04:37:56.544988', 'step': 15142, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:56.609935', 'step': 15142, 'epoch': 3} {'type': 'loss', 'content': 0.16866540908813477, 'timestamp': '2025-10-01 04:37:56.612514', 'step': 15143, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:56.666124', 'step': 15143, 'epoch': 3} {'type': 'loss', 'content': 0.06631629914045334, 'timestamp': '2025-10-01 04:37:56.672098', 'step': 15144, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:56.754326', 'step': 15144, 'epoch': 3} {'type': 'loss', 'content': 0.08719402551651001, 'timestamp': '2025-10-01 04:37:56.757082', 'step': 15145, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:56.823565', 'step': 15145, 'epoch': 3} {'type': 'loss', 'content': 0.12923425436019897, 'timestamp': '2025-10-01 04:37:56.826442', 'step': 15146, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:56.885293', 'step': 15146, 'epoch': 3} {'type': 'loss', 'content': 0.13987649977207184, 'timestamp': '2025-10-01 04:37:56.887692', 'step': 15147, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:56.941684', 'step': 15147, 'epoch': 3} {'type': 'loss', 'content': 0.08058267831802368, 'timestamp': '2025-10-01 04:37:56.947452', 'step': 15148, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:57.000932', 'step': 15148, 'epoch': 3} {'type': 'loss', 'content': 0.1780806928873062, 'timestamp': '2025-10-01 04:37:57.007425', 'step': 15149, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:57.080757', 'step': 15149, 'epoch': 3} {'type': 'loss', 'content': 0.04726095870137215, 'timestamp': '2025-10-01 04:37:57.083113', 'step': 15150, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:57.137125', 'step': 15150, 'epoch': 3} {'type': 'loss', 'content': 0.08518576622009277, 'timestamp': '2025-10-01 04:37:57.143688', 'step': 15151, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:57.202158', 'step': 15151, 'epoch': 3} {'type': 'loss', 'content': 0.1410946249961853, 'timestamp': '2025-10-01 04:37:57.207989', 'step': 15152, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:57.261541', 'step': 15152, 'epoch': 3} {'type': 'loss', 'content': 0.11910291016101837, 'timestamp': '2025-10-01 04:37:57.263765', 'step': 15153, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:57.317417', 'step': 15153, 'epoch': 3} {'type': 'loss', 'content': 0.1006428673863411, 'timestamp': '2025-10-01 04:37:57.321311', 'step': 15154, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:37:57.374351', 'step': 15154, 'epoch': 3} {'type': 'loss', 'content': 0.17446716129779816, 'timestamp': '2025-10-01 04:37:57.384069', 'step': 15155, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:37:57.450949', 'step': 15155, 'epoch': 3} {'type': 'loss', 'content': 0.07878678292036057, 'timestamp': '2025-10-01 04:37:57.456764', 'step': 15156, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:57.510056', 'step': 15156, 'epoch': 3} {'type': 'loss', 'content': 0.13383466005325317, 'timestamp': '2025-10-01 04:37:57.512783', 'step': 15157, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:57.567194', 'step': 15157, 'epoch': 3} {'type': 'loss', 'content': 0.18148095905780792, 'timestamp': '2025-10-01 04:37:57.576625', 'step': 15158, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:57.631163', 'step': 15158, 'epoch': 3} {'type': 'loss', 'content': 0.14006105065345764, 'timestamp': '2025-10-01 04:37:57.633559', 'step': 15159, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:37:57.689318', 'step': 15159, 'epoch': 3} {'type': 'loss', 'content': 0.03326501324772835, 'timestamp': '2025-10-01 04:37:57.696315', 'step': 15160, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:57.750327', 'step': 15160, 'epoch': 3} {'type': 'loss', 'content': 0.16447927057743073, 'timestamp': '2025-10-01 04:37:57.753181', 'step': 15161, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:57.809837', 'step': 15161, 'epoch': 3} {'type': 'loss', 'content': 0.048631004989147186, 'timestamp': '2025-10-01 04:37:57.812556', 'step': 15162, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:57.868007', 'step': 15162, 'epoch': 3} {'type': 'loss', 'content': 0.24892354011535645, 'timestamp': '2025-10-01 04:37:57.884166', 'step': 15163, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:57.938588', 'step': 15163, 'epoch': 3} {'type': 'loss', 'content': 0.1469593048095703, 'timestamp': '2025-10-01 04:37:57.944668', 'step': 15164, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:57.998977', 'step': 15164, 'epoch': 3} {'type': 'loss', 'content': 0.17547142505645752, 'timestamp': '2025-10-01 04:37:58.001839', 'step': 15165, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:58.057180', 'step': 15165, 'epoch': 3} {'type': 'loss', 'content': 0.1777786761522293, 'timestamp': '2025-10-01 04:37:58.059952', 'step': 15166, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:58.115816', 'step': 15166, 'epoch': 3} {'type': 'loss', 'content': 0.0534152053296566, 'timestamp': '2025-10-01 04:37:58.118329', 'step': 15167, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:58.174060', 'step': 15167, 'epoch': 3} {'type': 'loss', 'content': 0.09044495224952698, 'timestamp': '2025-10-01 04:37:58.197913', 'step': 15168, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:37:58.254375', 'step': 15168, 'epoch': 3} {'type': 'loss', 'content': 0.11055810004472733, 'timestamp': '2025-10-01 04:37:58.257086', 'step': 15169, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:58.329571', 'step': 15169, 'epoch': 3} {'type': 'loss', 'content': 0.12659259140491486, 'timestamp': '2025-10-01 04:37:58.332163', 'step': 15170, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:58.387757', 'step': 15170, 'epoch': 3} {'type': 'loss', 'content': 0.07368208467960358, 'timestamp': '2025-10-01 04:37:58.390629', 'step': 15171, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:58.446330', 'step': 15171, 'epoch': 3} {'type': 'loss', 'content': 0.1144113689661026, 'timestamp': '2025-10-01 04:37:58.453210', 'step': 15172, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:58.507958', 'step': 15172, 'epoch': 3} {'type': 'loss', 'content': 0.16532371938228607, 'timestamp': '2025-10-01 04:37:58.510139', 'step': 15173, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:58.565245', 'step': 15173, 'epoch': 3} {'type': 'loss', 'content': 0.06475116312503815, 'timestamp': '2025-10-01 04:37:58.567605', 'step': 15174, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:58.622032', 'step': 15174, 'epoch': 3} {'type': 'loss', 'content': 0.07615413516759872, 'timestamp': '2025-10-01 04:37:58.624533', 'step': 15175, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:58.679182', 'step': 15175, 'epoch': 3} {'type': 'loss', 'content': 0.05813530460000038, 'timestamp': '2025-10-01 04:37:58.685369', 'step': 15176, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:58.738917', 'step': 15176, 'epoch': 3} {'type': 'loss', 'content': 0.12912507355213165, 'timestamp': '2025-10-01 04:37:58.742253', 'step': 15177, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:58.798533', 'step': 15177, 'epoch': 3} {'type': 'loss', 'content': 0.10653862357139587, 'timestamp': '2025-10-01 04:37:58.801209', 'step': 15178, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:58.855445', 'step': 15178, 'epoch': 3} {'type': 'loss', 'content': 0.07968296110630035, 'timestamp': '2025-10-01 04:37:58.858373', 'step': 15179, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:58.913066', 'step': 15179, 'epoch': 3} {'type': 'loss', 'content': 0.17863278090953827, 'timestamp': '2025-10-01 04:37:58.919501', 'step': 15180, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:58.973227', 'step': 15180, 'epoch': 3} {'type': 'loss', 'content': 0.0939735397696495, 'timestamp': '2025-10-01 04:37:58.977097', 'step': 15181, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:59.032039', 'step': 15181, 'epoch': 3} {'type': 'loss', 'content': 0.06574137508869171, 'timestamp': '2025-10-01 04:37:59.035153', 'step': 15182, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:59.090548', 'step': 15182, 'epoch': 3} {'type': 'loss', 'content': 0.09551636129617691, 'timestamp': '2025-10-01 04:37:59.093261', 'step': 15183, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-01 04:37:59.150004', 'step': 15183, 'epoch': 3} {'type': 'loss', 'content': 0.0867093876004219, 'timestamp': '2025-10-01 04:37:59.156777', 'step': 15184, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:59.222168', 'step': 15184, 'epoch': 3} {'type': 'loss', 'content': 0.11698925495147705, 'timestamp': '2025-10-01 04:37:59.225047', 'step': 15185, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:59.279765', 'step': 15185, 'epoch': 3} {'type': 'loss', 'content': 0.07723366469144821, 'timestamp': '2025-10-01 04:37:59.282287', 'step': 15186, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:59.336133', 'step': 15186, 'epoch': 3} {'type': 'loss', 'content': 0.07655014097690582, 'timestamp': '2025-10-01 04:37:59.338874', 'step': 15187, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:59.393244', 'step': 15187, 'epoch': 3} {'type': 'loss', 'content': 0.10371604561805725, 'timestamp': '2025-10-01 04:37:59.399983', 'step': 15188, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:59.465182', 'step': 15188, 'epoch': 3} {'type': 'loss', 'content': 0.08809618651866913, 'timestamp': '2025-10-01 04:37:59.467459', 'step': 15189, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:59.520627', 'step': 15189, 'epoch': 3} {'type': 'loss', 'content': 0.060754768550395966, 'timestamp': '2025-10-01 04:37:59.524653', 'step': 15190, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:37:59.582641', 'step': 15190, 'epoch': 3} {'type': 'loss', 'content': 0.16847370564937592, 'timestamp': '2025-10-01 04:37:59.585589', 'step': 15191, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:59.641850', 'step': 15191, 'epoch': 3} {'type': 'loss', 'content': 0.16864627599716187, 'timestamp': '2025-10-01 04:37:59.648219', 'step': 15192, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:59.703384', 'step': 15192, 'epoch': 3} {'type': 'loss', 'content': 0.10446669161319733, 'timestamp': '2025-10-01 04:37:59.705670', 'step': 15193, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:59.765049', 'step': 15193, 'epoch': 3} {'type': 'loss', 'content': 0.07258718460798264, 'timestamp': '2025-10-01 04:37:59.768154', 'step': 15194, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:37:59.828482', 'step': 15194, 'epoch': 3} {'type': 'loss', 'content': 0.09122500568628311, 'timestamp': '2025-10-01 04:37:59.830751', 'step': 15195, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:37:59.891035', 'step': 15195, 'epoch': 3} {'type': 'loss', 'content': 0.057531002908945084, 'timestamp': '2025-10-01 04:37:59.898348', 'step': 15196, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:37:59.958262', 'step': 15196, 'epoch': 3} {'type': 'loss', 'content': 0.11667758226394653, 'timestamp': '2025-10-01 04:37:59.960622', 'step': 15197, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:00.022084', 'step': 15197, 'epoch': 3} {'type': 'loss', 'content': 0.10064960271120071, 'timestamp': '2025-10-01 04:38:00.025400', 'step': 15198, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:00.087093', 'step': 15198, 'epoch': 3} {'type': 'loss', 'content': 0.11284367740154266, 'timestamp': '2025-10-01 04:38:00.089467', 'step': 15199, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:00.149468', 'step': 15199, 'epoch': 3} {'type': 'loss', 'content': 0.1312074065208435, 'timestamp': '2025-10-01 04:38:00.158938', 'step': 15200, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:00.219632', 'step': 15200, 'epoch': 3} {'type': 'loss', 'content': 0.026276418939232826, 'timestamp': '2025-10-01 04:38:00.222066', 'step': 15201, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:00.284529', 'step': 15201, 'epoch': 3} {'type': 'loss', 'content': 0.07609464973211288, 'timestamp': '2025-10-01 04:38:00.287261', 'step': 15202, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:38:00.349114', 'step': 15202, 'epoch': 3} {'type': 'loss', 'content': 0.06273392587900162, 'timestamp': '2025-10-01 04:38:00.351544', 'step': 15203, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:38:00.410182', 'step': 15203, 'epoch': 3} {'type': 'loss', 'content': 0.042866818606853485, 'timestamp': '2025-10-01 04:38:00.430839', 'step': 15204, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:38:00.486278', 'step': 15204, 'epoch': 3} {'type': 'loss', 'content': 0.09741413593292236, 'timestamp': '2025-10-01 04:38:00.488723', 'step': 15205, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:00.543542', 'step': 15205, 'epoch': 3} {'type': 'loss', 'content': 0.22711609303951263, 'timestamp': '2025-10-01 04:38:00.546184', 'step': 15206, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:00.601265', 'step': 15206, 'epoch': 3} {'type': 'loss', 'content': 0.2121991217136383, 'timestamp': '2025-10-01 04:38:00.625091', 'step': 15207, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:00.678923', 'step': 15207, 'epoch': 3} {'type': 'loss', 'content': 0.22383621335029602, 'timestamp': '2025-10-01 04:38:00.685598', 'step': 15208, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:00.753887', 'step': 15208, 'epoch': 3} {'type': 'loss', 'content': 0.19870604574680328, 'timestamp': '2025-10-01 04:38:00.756137', 'step': 15209, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:38:00.809295', 'step': 15209, 'epoch': 3} {'type': 'loss', 'content': 0.08762840926647186, 'timestamp': '2025-10-01 04:38:00.812102', 'step': 15210, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:38:00.865406', 'step': 15210, 'epoch': 3} {'type': 'loss', 'content': 0.05988041311502457, 'timestamp': '2025-10-01 04:38:00.868066', 'step': 15211, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:00.922040', 'step': 15211, 'epoch': 3} {'type': 'loss', 'content': 0.18687590956687927, 'timestamp': '2025-10-01 04:38:00.928251', 'step': 15212, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:38:00.982615', 'step': 15212, 'epoch': 3} {'type': 'loss', 'content': 0.17562517523765564, 'timestamp': '2025-10-01 04:38:00.984853', 'step': 15213, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:01.038981', 'step': 15213, 'epoch': 3} {'type': 'loss', 'content': 0.15668688714504242, 'timestamp': '2025-10-01 04:38:01.041415', 'step': 15214, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:01.095172', 'step': 15214, 'epoch': 3} {'type': 'loss', 'content': 0.11698432266712189, 'timestamp': '2025-10-01 04:38:01.097414', 'step': 15215, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:01.157071', 'step': 15215, 'epoch': 3} {'type': 'loss', 'content': 0.04172421619296074, 'timestamp': '2025-10-01 04:38:01.163595', 'step': 15216, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:38:01.217011', 'step': 15216, 'epoch': 3} {'type': 'loss', 'content': 0.1094261184334755, 'timestamp': '2025-10-01 04:38:01.219297', 'step': 15217, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:01.274285', 'step': 15217, 'epoch': 3} {'type': 'loss', 'content': 0.1065140888094902, 'timestamp': '2025-10-01 04:38:01.286916', 'step': 15218, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:01.340161', 'step': 15218, 'epoch': 3} {'type': 'loss', 'content': 0.1023322269320488, 'timestamp': '2025-10-01 04:38:01.342355', 'step': 15219, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:01.395908', 'step': 15219, 'epoch': 3} {'type': 'loss', 'content': 0.14301025867462158, 'timestamp': '2025-10-01 04:38:01.401492', 'step': 15220, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:01.454195', 'step': 15220, 'epoch': 3} {'type': 'loss', 'content': 0.15390734374523163, 'timestamp': '2025-10-01 04:38:01.456122', 'step': 15221, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:38:01.509768', 'step': 15221, 'epoch': 3} {'type': 'loss', 'content': 0.14611949026584625, 'timestamp': '2025-10-01 04:38:01.511776', 'step': 15222, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:01.565610', 'step': 15222, 'epoch': 3} {'type': 'loss', 'content': 0.0826115608215332, 'timestamp': '2025-10-01 04:38:01.568278', 'step': 15223, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:01.621747', 'step': 15223, 'epoch': 3} {'type': 'loss', 'content': 0.13899268209934235, 'timestamp': '2025-10-01 04:38:01.627752', 'step': 15224, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-01 04:38:01.695763', 'step': 15224, 'epoch': 3} {'type': 'loss', 'content': 0.06412194669246674, 'timestamp': '2025-10-01 04:38:01.709322', 'step': 15225, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:01.762766', 'step': 15225, 'epoch': 3} {'type': 'loss', 'content': 0.18614178895950317, 'timestamp': '2025-10-01 04:38:01.765055', 'step': 15226, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:01.819139', 'step': 15226, 'epoch': 3} {'type': 'loss', 'content': 0.11887510865926743, 'timestamp': '2025-10-01 04:38:01.821572', 'step': 15227, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:01.874718', 'step': 15227, 'epoch': 3} {'type': 'loss', 'content': 0.07873192429542542, 'timestamp': '2025-10-01 04:38:01.880169', 'step': 15228, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:01.939583', 'step': 15228, 'epoch': 3} {'type': 'loss', 'content': 0.10498365014791489, 'timestamp': '2025-10-01 04:38:01.941398', 'step': 15229, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:38:02.007202', 'step': 15229, 'epoch': 3} {'type': 'loss', 'content': 0.12903225421905518, 'timestamp': '2025-10-01 04:38:02.009553', 'step': 15230, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:02.063491', 'step': 15230, 'epoch': 3} {'type': 'loss', 'content': 0.14161115884780884, 'timestamp': '2025-10-01 04:38:02.065739', 'step': 15231, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:02.119174', 'step': 15231, 'epoch': 3} {'type': 'loss', 'content': 0.06559564918279648, 'timestamp': '2025-10-01 04:38:02.125095', 'step': 15232, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:02.180265', 'step': 15232, 'epoch': 3} {'type': 'loss', 'content': 0.06268080323934555, 'timestamp': '2025-10-01 04:38:02.182522', 'step': 15233, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:02.237617', 'step': 15233, 'epoch': 3} {'type': 'loss', 'content': 0.12441720068454742, 'timestamp': '2025-10-01 04:38:02.239536', 'step': 15234, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:02.293007', 'step': 15234, 'epoch': 3} {'type': 'loss', 'content': 0.044699057936668396, 'timestamp': '2025-10-01 04:38:02.294970', 'step': 15235, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:02.349224', 'step': 15235, 'epoch': 3} {'type': 'loss', 'content': 0.07170580327510834, 'timestamp': '2025-10-01 04:38:02.354844', 'step': 15236, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:02.407711', 'step': 15236, 'epoch': 3} {'type': 'loss', 'content': 0.09274572134017944, 'timestamp': '2025-10-01 04:38:02.410612', 'step': 15237, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:02.464292', 'step': 15237, 'epoch': 3} {'type': 'loss', 'content': 0.07910530269145966, 'timestamp': '2025-10-01 04:38:02.466445', 'step': 15238, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:02.520625', 'step': 15238, 'epoch': 3} {'type': 'loss', 'content': 0.09546765685081482, 'timestamp': '2025-10-01 04:38:02.525082', 'step': 15239, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:02.578700', 'step': 15239, 'epoch': 3} {'type': 'loss', 'content': 0.10814020037651062, 'timestamp': '2025-10-01 04:38:02.584578', 'step': 15240, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:02.648241', 'step': 15240, 'epoch': 3} {'type': 'loss', 'content': 0.22346378862857819, 'timestamp': '2025-10-01 04:38:02.651643', 'step': 15241, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:02.706255', 'step': 15241, 'epoch': 3} {'type': 'loss', 'content': 0.12085793912410736, 'timestamp': '2025-10-01 04:38:02.708189', 'step': 15242, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:02.761966', 'step': 15242, 'epoch': 3} {'type': 'loss', 'content': 0.05750666558742523, 'timestamp': '2025-10-01 04:38:02.764484', 'step': 15243, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:02.818184', 'step': 15243, 'epoch': 3} {'type': 'loss', 'content': 0.06121540814638138, 'timestamp': '2025-10-01 04:38:02.823884', 'step': 15244, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:38:02.877265', 'step': 15244, 'epoch': 3} {'type': 'loss', 'content': 0.08718197792768478, 'timestamp': '2025-10-01 04:38:02.879457', 'step': 15245, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:02.933698', 'step': 15245, 'epoch': 3} {'type': 'loss', 'content': 0.2142678052186966, 'timestamp': '2025-10-01 04:38:02.936081', 'step': 15246, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:02.989606', 'step': 15246, 'epoch': 3} {'type': 'loss', 'content': 0.0801803469657898, 'timestamp': '2025-10-01 04:38:02.991890', 'step': 15247, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:03.045943', 'step': 15247, 'epoch': 3} {'type': 'loss', 'content': 0.12067537754774094, 'timestamp': '2025-10-01 04:38:03.051491', 'step': 15248, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:03.105006', 'step': 15248, 'epoch': 3} {'type': 'loss', 'content': 0.1130061149597168, 'timestamp': '2025-10-01 04:38:03.107109', 'step': 15249, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:38:03.161140', 'step': 15249, 'epoch': 3} {'type': 'loss', 'content': 0.06476388871669769, 'timestamp': '2025-10-01 04:38:03.163665', 'step': 15250, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:38:03.217787', 'step': 15250, 'epoch': 3} {'type': 'loss', 'content': 0.15775947272777557, 'timestamp': '2025-10-01 04:38:03.220047', 'step': 15251, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:03.273013', 'step': 15251, 'epoch': 3} {'type': 'loss', 'content': 0.11881249397993088, 'timestamp': '2025-10-01 04:38:03.278840', 'step': 15252, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:03.332057', 'step': 15252, 'epoch': 3} {'type': 'loss', 'content': 0.06449535489082336, 'timestamp': '2025-10-01 04:38:03.337428', 'step': 15253, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:03.393452', 'step': 15253, 'epoch': 3} {'type': 'loss', 'content': 0.1382746547460556, 'timestamp': '2025-10-01 04:38:03.395797', 'step': 15254, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:03.449482', 'step': 15254, 'epoch': 3} {'type': 'loss', 'content': 0.04862360283732414, 'timestamp': '2025-10-01 04:38:03.451616', 'step': 15255, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:03.505558', 'step': 15255, 'epoch': 3} {'type': 'loss', 'content': 0.09779078513383865, 'timestamp': '2025-10-01 04:38:03.515768', 'step': 15256, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:03.573183', 'step': 15256, 'epoch': 3} {'type': 'loss', 'content': 0.1593540608882904, 'timestamp': '2025-10-01 04:38:03.575219', 'step': 15257, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:38:03.629480', 'step': 15257, 'epoch': 3} {'type': 'loss', 'content': 0.07345487922430038, 'timestamp': '2025-10-01 04:38:03.631932', 'step': 15258, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:38:03.685844', 'step': 15258, 'epoch': 3} {'type': 'loss', 'content': 0.08765774220228195, 'timestamp': '2025-10-01 04:38:03.688124', 'step': 15259, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:03.746079', 'step': 15259, 'epoch': 3} {'type': 'loss', 'content': 0.09920935332775116, 'timestamp': '2025-10-01 04:38:03.752398', 'step': 15260, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:03.816650', 'step': 15260, 'epoch': 3} {'type': 'loss', 'content': 0.09782331436872482, 'timestamp': '2025-10-01 04:38:03.819441', 'step': 15261, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:03.874223', 'step': 15261, 'epoch': 3} {'type': 'loss', 'content': 0.11950846761465073, 'timestamp': '2025-10-01 04:38:03.878886', 'step': 15262, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:03.932879', 'step': 15262, 'epoch': 3} {'type': 'loss', 'content': 0.11289186030626297, 'timestamp': '2025-10-01 04:38:03.936260', 'step': 15263, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:03.994965', 'step': 15263, 'epoch': 3} {'type': 'loss', 'content': 0.10028126090765, 'timestamp': '2025-10-01 04:38:04.000598', 'step': 15264, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:38:04.053898', 'step': 15264, 'epoch': 3} {'type': 'loss', 'content': 0.13280756771564484, 'timestamp': '2025-10-01 04:38:04.064731', 'step': 15265, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:04.123754', 'step': 15265, 'epoch': 3} {'type': 'loss', 'content': 0.13226555287837982, 'timestamp': '2025-10-01 04:38:04.127797', 'step': 15266, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:04.187402', 'step': 15266, 'epoch': 3} {'type': 'loss', 'content': 0.21186919510364532, 'timestamp': '2025-10-01 04:38:04.189600', 'step': 15267, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:04.259851', 'step': 15267, 'epoch': 3} {'type': 'loss', 'content': 0.07033325731754303, 'timestamp': '2025-10-01 04:38:04.272203', 'step': 15268, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:04.327294', 'step': 15268, 'epoch': 3} {'type': 'loss', 'content': 0.10583221912384033, 'timestamp': '2025-10-01 04:38:04.329654', 'step': 15269, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:04.385603', 'step': 15269, 'epoch': 3} {'type': 'loss', 'content': 0.06321396678686142, 'timestamp': '2025-10-01 04:38:04.387458', 'step': 15270, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:04.443377', 'step': 15270, 'epoch': 3} {'type': 'loss', 'content': 0.10989364981651306, 'timestamp': '2025-10-01 04:38:04.445580', 'step': 15271, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:04.498904', 'step': 15271, 'epoch': 3} {'type': 'loss', 'content': 0.10289767384529114, 'timestamp': '2025-10-01 04:38:04.504803', 'step': 15272, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:38:04.561158', 'step': 15272, 'epoch': 3} {'type': 'loss', 'content': 0.06399384140968323, 'timestamp': '2025-10-01 04:38:04.563354', 'step': 15273, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:04.617717', 'step': 15273, 'epoch': 3} {'type': 'loss', 'content': 0.06009909510612488, 'timestamp': '2025-10-01 04:38:04.619996', 'step': 15274, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:04.673622', 'step': 15274, 'epoch': 3} {'type': 'loss', 'content': 0.0326271615922451, 'timestamp': '2025-10-01 04:38:04.676065', 'step': 15275, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:04.729363', 'step': 15275, 'epoch': 3} {'type': 'loss', 'content': 0.22909893095493317, 'timestamp': '2025-10-01 04:38:04.746121', 'step': 15276, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:38:04.799189', 'step': 15276, 'epoch': 3} {'type': 'loss', 'content': 0.12652543187141418, 'timestamp': '2025-10-01 04:38:04.800935', 'step': 15277, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:04.862449', 'step': 15277, 'epoch': 3} {'type': 'loss', 'content': 0.06214440241456032, 'timestamp': '2025-10-01 04:38:04.865012', 'step': 15278, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:04.922495', 'step': 15278, 'epoch': 3} {'type': 'loss', 'content': 0.07245802134275436, 'timestamp': '2025-10-01 04:38:04.924906', 'step': 15279, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:04.978879', 'step': 15279, 'epoch': 3} {'type': 'loss', 'content': 0.16211381554603577, 'timestamp': '2025-10-01 04:38:04.984913', 'step': 15280, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:05.038169', 'step': 15280, 'epoch': 3} {'type': 'loss', 'content': 0.05665459483861923, 'timestamp': '2025-10-01 04:38:05.040419', 'step': 15281, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:05.094246', 'step': 15281, 'epoch': 3} {'type': 'loss', 'content': 0.08925499022006989, 'timestamp': '2025-10-01 04:38:05.096459', 'step': 15282, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:05.157245', 'step': 15282, 'epoch': 3} {'type': 'loss', 'content': 0.040473390370607376, 'timestamp': '2025-10-01 04:38:05.160030', 'step': 15283, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:05.214194', 'step': 15283, 'epoch': 3} {'type': 'loss', 'content': 0.08848138153553009, 'timestamp': '2025-10-01 04:38:05.220170', 'step': 15284, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:38:05.274161', 'step': 15284, 'epoch': 3} {'type': 'loss', 'content': 0.1615288406610489, 'timestamp': '2025-10-01 04:38:05.276405', 'step': 15285, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:05.336987', 'step': 15285, 'epoch': 3} {'type': 'loss', 'content': 0.0922425389289856, 'timestamp': '2025-10-01 04:38:05.339543', 'step': 15286, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:38:05.394606', 'step': 15286, 'epoch': 3} {'type': 'loss', 'content': 0.2610211670398712, 'timestamp': '2025-10-01 04:38:05.397608', 'step': 15287, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:05.457739', 'step': 15287, 'epoch': 3} {'type': 'loss', 'content': 0.08275318890810013, 'timestamp': '2025-10-01 04:38:05.464715', 'step': 15288, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:05.520111', 'step': 15288, 'epoch': 3} {'type': 'loss', 'content': 0.13408350944519043, 'timestamp': '2025-10-01 04:38:05.522771', 'step': 15289, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:05.576590', 'step': 15289, 'epoch': 3} {'type': 'loss', 'content': 0.04252681881189346, 'timestamp': '2025-10-01 04:38:05.579070', 'step': 15290, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:05.633834', 'step': 15290, 'epoch': 3} {'type': 'loss', 'content': 0.09336323291063309, 'timestamp': '2025-10-01 04:38:05.636557', 'step': 15291, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:05.690924', 'step': 15291, 'epoch': 3} {'type': 'loss', 'content': 0.17488327622413635, 'timestamp': '2025-10-01 04:38:05.696890', 'step': 15292, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:38:05.750979', 'step': 15292, 'epoch': 3} {'type': 'loss', 'content': 0.08434013277292252, 'timestamp': '2025-10-01 04:38:05.753505', 'step': 15293, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:05.808313', 'step': 15293, 'epoch': 3} {'type': 'loss', 'content': 0.10303261876106262, 'timestamp': '2025-10-01 04:38:05.811157', 'step': 15294, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:05.865010', 'step': 15294, 'epoch': 3} {'type': 'loss', 'content': 0.1134558618068695, 'timestamp': '2025-10-01 04:38:05.867483', 'step': 15295, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:05.921893', 'step': 15295, 'epoch': 3} {'type': 'loss', 'content': 0.0664370208978653, 'timestamp': '2025-10-01 04:38:05.928292', 'step': 15296, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:05.982074', 'step': 15296, 'epoch': 3} {'type': 'loss', 'content': 0.14043356478214264, 'timestamp': '2025-10-01 04:38:05.984887', 'step': 15297, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:06.039013', 'step': 15297, 'epoch': 3} {'type': 'loss', 'content': 0.07032283395528793, 'timestamp': '2025-10-01 04:38:06.041775', 'step': 15298, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:06.097024', 'step': 15298, 'epoch': 3} {'type': 'loss', 'content': 0.11548622697591782, 'timestamp': '2025-10-01 04:38:06.099545', 'step': 15299, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:06.154115', 'step': 15299, 'epoch': 3} {'type': 'loss', 'content': 0.19675995409488678, 'timestamp': '2025-10-01 04:38:06.160156', 'step': 15300, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:06.214587', 'step': 15300, 'epoch': 3} {'type': 'loss', 'content': 0.14757978916168213, 'timestamp': '2025-10-01 04:38:06.217256', 'step': 15301, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:06.274133', 'step': 15301, 'epoch': 3} {'type': 'loss', 'content': 0.2159956842660904, 'timestamp': '2025-10-01 04:38:06.276815', 'step': 15302, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:06.332864', 'step': 15302, 'epoch': 3} {'type': 'loss', 'content': 0.05474821478128433, 'timestamp': '2025-10-01 04:38:06.335123', 'step': 15303, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:06.388895', 'step': 15303, 'epoch': 3} {'type': 'loss', 'content': 0.09920204430818558, 'timestamp': '2025-10-01 04:38:06.395186', 'step': 15304, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:06.448583', 'step': 15304, 'epoch': 3} {'type': 'loss', 'content': 0.11230431497097015, 'timestamp': '2025-10-01 04:38:06.451021', 'step': 15305, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:06.505601', 'step': 15305, 'epoch': 3} {'type': 'loss', 'content': 0.05940953642129898, 'timestamp': '2025-10-01 04:38:06.507910', 'step': 15306, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:06.562680', 'step': 15306, 'epoch': 3} {'type': 'loss', 'content': 0.041112273931503296, 'timestamp': '2025-10-01 04:38:06.565589', 'step': 15307, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:38:06.620580', 'step': 15307, 'epoch': 3} {'type': 'loss', 'content': 0.1656993329524994, 'timestamp': '2025-10-01 04:38:06.627015', 'step': 15308, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:06.681140', 'step': 15308, 'epoch': 3} {'type': 'loss', 'content': 0.12127401679754257, 'timestamp': '2025-10-01 04:38:06.683473', 'step': 15309, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:38:06.737974', 'step': 15309, 'epoch': 3} {'type': 'loss', 'content': 0.08107129484415054, 'timestamp': '2025-10-01 04:38:06.741003', 'step': 15310, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:06.795680', 'step': 15310, 'epoch': 3} {'type': 'loss', 'content': 0.11932411044836044, 'timestamp': '2025-10-01 04:38:06.797992', 'step': 15311, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:38:06.852204', 'step': 15311, 'epoch': 3} {'type': 'loss', 'content': 0.11589379608631134, 'timestamp': '2025-10-01 04:38:06.858943', 'step': 15312, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:06.913469', 'step': 15312, 'epoch': 3} {'type': 'loss', 'content': 0.09008532762527466, 'timestamp': '2025-10-01 04:38:06.917442', 'step': 15313, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:06.971493', 'step': 15313, 'epoch': 3} {'type': 'loss', 'content': 0.07382144033908844, 'timestamp': '2025-10-01 04:38:06.973751', 'step': 15314, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:38:07.027483', 'step': 15314, 'epoch': 3} {'type': 'loss', 'content': 0.1825307011604309, 'timestamp': '2025-10-01 04:38:07.029953', 'step': 15315, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:07.083166', 'step': 15315, 'epoch': 3} {'type': 'loss', 'content': 0.10794752836227417, 'timestamp': '2025-10-01 04:38:07.089619', 'step': 15316, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:07.142214', 'step': 15316, 'epoch': 3} {'type': 'loss', 'content': 0.12469767034053802, 'timestamp': '2025-10-01 04:38:07.144571', 'step': 15317, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:07.198905', 'step': 15317, 'epoch': 3} {'type': 'loss', 'content': 0.22907088696956635, 'timestamp': '2025-10-01 04:38:07.201158', 'step': 15318, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:07.254551', 'step': 15318, 'epoch': 3} {'type': 'loss', 'content': 0.10253136605024338, 'timestamp': '2025-10-01 04:38:07.257036', 'step': 15319, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:07.310320', 'step': 15319, 'epoch': 3} {'type': 'loss', 'content': 0.12138383090496063, 'timestamp': '2025-10-01 04:38:07.317322', 'step': 15320, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:38:07.370021', 'step': 15320, 'epoch': 3} {'type': 'loss', 'content': 0.14155325293540955, 'timestamp': '2025-10-01 04:38:07.372325', 'step': 15321, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:07.426072', 'step': 15321, 'epoch': 3} {'type': 'loss', 'content': 0.10532642900943756, 'timestamp': '2025-10-01 04:38:07.428585', 'step': 15322, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:38:07.482413', 'step': 15322, 'epoch': 3} {'type': 'loss', 'content': 0.12085878103971481, 'timestamp': '2025-10-01 04:38:07.485143', 'step': 15323, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:07.539129', 'step': 15323, 'epoch': 3} {'type': 'loss', 'content': 0.05331648513674736, 'timestamp': '2025-10-01 04:38:07.545484', 'step': 15324, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:07.598497', 'step': 15324, 'epoch': 3} {'type': 'loss', 'content': 0.13409286737442017, 'timestamp': '2025-10-01 04:38:07.600743', 'step': 15325, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:38:07.654470', 'step': 15325, 'epoch': 3} {'type': 'loss', 'content': 0.12448553740978241, 'timestamp': '2025-10-01 04:38:07.656805', 'step': 15326, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:07.712391', 'step': 15326, 'epoch': 3} {'type': 'loss', 'content': 0.04444834962487221, 'timestamp': '2025-10-01 04:38:07.714663', 'step': 15327, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:38:07.768662', 'step': 15327, 'epoch': 3} {'type': 'loss', 'content': 0.08173017203807831, 'timestamp': '2025-10-01 04:38:07.774821', 'step': 15328, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:38:07.828028', 'step': 15328, 'epoch': 3} {'type': 'loss', 'content': 0.10413963347673416, 'timestamp': '2025-10-01 04:38:07.831192', 'step': 15329, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:07.885575', 'step': 15329, 'epoch': 3} {'type': 'loss', 'content': 0.1129000186920166, 'timestamp': '2025-10-01 04:38:07.887777', 'step': 15330, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:07.941602', 'step': 15330, 'epoch': 3} {'type': 'loss', 'content': 0.18012283742427826, 'timestamp': '2025-10-01 04:38:07.943763', 'step': 15331, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:38:07.997295', 'step': 15331, 'epoch': 3} {'type': 'loss', 'content': 0.12524458765983582, 'timestamp': '2025-10-01 04:38:08.004280', 'step': 15332, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:08.057050', 'step': 15332, 'epoch': 3} {'type': 'loss', 'content': 0.1042478159070015, 'timestamp': '2025-10-01 04:38:08.059271', 'step': 15333, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:08.115552', 'step': 15333, 'epoch': 3} {'type': 'loss', 'content': 0.06537877768278122, 'timestamp': '2025-10-01 04:38:08.117826', 'step': 15334, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:08.171291', 'step': 15334, 'epoch': 3} {'type': 'loss', 'content': 0.05738235265016556, 'timestamp': '2025-10-01 04:38:08.173585', 'step': 15335, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:08.227491', 'step': 15335, 'epoch': 3} {'type': 'loss', 'content': 0.05551810562610626, 'timestamp': '2025-10-01 04:38:08.233876', 'step': 15336, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:08.288441', 'step': 15336, 'epoch': 3} {'type': 'loss', 'content': 0.0634477436542511, 'timestamp': '2025-10-01 04:38:08.293288', 'step': 15337, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:08.350115', 'step': 15337, 'epoch': 3} {'type': 'loss', 'content': 0.07851063460111618, 'timestamp': '2025-10-01 04:38:08.352347', 'step': 15338, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:38:08.406041', 'step': 15338, 'epoch': 3} {'type': 'loss', 'content': 0.06702155619859695, 'timestamp': '2025-10-01 04:38:08.408770', 'step': 15339, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:08.469748', 'step': 15339, 'epoch': 3} {'type': 'loss', 'content': 0.11125215888023376, 'timestamp': '2025-10-01 04:38:08.477036', 'step': 15340, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:08.529935', 'step': 15340, 'epoch': 3} {'type': 'loss', 'content': 0.06653033196926117, 'timestamp': '2025-10-01 04:38:08.532193', 'step': 15341, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:08.585625', 'step': 15341, 'epoch': 3} {'type': 'loss', 'content': 0.15142546594142914, 'timestamp': '2025-10-01 04:38:08.587847', 'step': 15342, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:08.641402', 'step': 15342, 'epoch': 3} {'type': 'loss', 'content': 0.09357263147830963, 'timestamp': '2025-10-01 04:38:08.643743', 'step': 15343, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:08.699167', 'step': 15343, 'epoch': 3} {'type': 'loss', 'content': 0.09665554761886597, 'timestamp': '2025-10-01 04:38:08.705186', 'step': 15344, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:08.758815', 'step': 15344, 'epoch': 3} {'type': 'loss', 'content': 0.054830241948366165, 'timestamp': '2025-10-01 04:38:08.761887', 'step': 15345, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:08.816891', 'step': 15345, 'epoch': 3} {'type': 'loss', 'content': 0.10749070346355438, 'timestamp': '2025-10-01 04:38:08.819545', 'step': 15346, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:08.876608', 'step': 15346, 'epoch': 3} {'type': 'loss', 'content': 0.13941505551338196, 'timestamp': '2025-10-01 04:38:08.879240', 'step': 15347, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:38:08.933483', 'step': 15347, 'epoch': 3} {'type': 'loss', 'content': 0.1912323385477066, 'timestamp': '2025-10-01 04:38:08.939498', 'step': 15348, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:08.992853', 'step': 15348, 'epoch': 3} {'type': 'loss', 'content': 0.09274185448884964, 'timestamp': '2025-10-01 04:38:08.995174', 'step': 15349, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:09.048591', 'step': 15349, 'epoch': 3} {'type': 'loss', 'content': 0.004971798043698072, 'timestamp': '2025-10-01 04:38:09.050902', 'step': 15350, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:38:09.105080', 'step': 15350, 'epoch': 3} {'type': 'loss', 'content': 0.07678980380296707, 'timestamp': '2025-10-01 04:38:09.107892', 'step': 15351, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:09.161442', 'step': 15351, 'epoch': 3} {'type': 'loss', 'content': 0.08625968545675278, 'timestamp': '2025-10-01 04:38:09.167375', 'step': 15352, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:09.221037', 'step': 15352, 'epoch': 3} {'type': 'loss', 'content': 0.05233941227197647, 'timestamp': '2025-10-01 04:38:09.223204', 'step': 15353, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:09.277098', 'step': 15353, 'epoch': 3} {'type': 'loss', 'content': 0.06123063340783119, 'timestamp': '2025-10-01 04:38:09.279519', 'step': 15354, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:09.335217', 'step': 15354, 'epoch': 3} {'type': 'loss', 'content': 0.14979460835456848, 'timestamp': '2025-10-01 04:38:09.337313', 'step': 15355, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:09.391122', 'step': 15355, 'epoch': 3} {'type': 'loss', 'content': 0.05643251910805702, 'timestamp': '2025-10-01 04:38:09.397158', 'step': 15356, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:09.453618', 'step': 15356, 'epoch': 3} {'type': 'loss', 'content': 0.14604607224464417, 'timestamp': '2025-10-01 04:38:09.455825', 'step': 15357, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:09.510967', 'step': 15357, 'epoch': 3} {'type': 'loss', 'content': 0.1484757363796234, 'timestamp': '2025-10-01 04:38:09.513244', 'step': 15358, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:38:09.567086', 'step': 15358, 'epoch': 3} {'type': 'loss', 'content': 0.05674319341778755, 'timestamp': '2025-10-01 04:38:09.569954', 'step': 15359, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:09.624479', 'step': 15359, 'epoch': 3} {'type': 'loss', 'content': 0.14798353612422943, 'timestamp': '2025-10-01 04:38:09.630422', 'step': 15360, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:09.683951', 'step': 15360, 'epoch': 3} {'type': 'loss', 'content': 0.06621002405881882, 'timestamp': '2025-10-01 04:38:09.686113', 'step': 15361, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:09.743175', 'step': 15361, 'epoch': 3} {'type': 'loss', 'content': 0.04503007233142853, 'timestamp': '2025-10-01 04:38:09.745400', 'step': 15362, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:09.799119', 'step': 15362, 'epoch': 3} {'type': 'loss', 'content': 0.11361571401357651, 'timestamp': '2025-10-01 04:38:09.801935', 'step': 15363, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:09.856471', 'step': 15363, 'epoch': 3} {'type': 'loss', 'content': 0.14423884451389313, 'timestamp': '2025-10-01 04:38:09.862281', 'step': 15364, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:09.916297', 'step': 15364, 'epoch': 3} {'type': 'loss', 'content': 0.1419086754322052, 'timestamp': '2025-10-01 04:38:09.918594', 'step': 15365, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:09.971800', 'step': 15365, 'epoch': 3} {'type': 'loss', 'content': 0.15521791577339172, 'timestamp': '2025-10-01 04:38:09.974262', 'step': 15366, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:38:10.027778', 'step': 15366, 'epoch': 3} {'type': 'loss', 'content': 0.1277618706226349, 'timestamp': '2025-10-01 04:38:10.030105', 'step': 15367, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:38:10.083654', 'step': 15367, 'epoch': 3} {'type': 'loss', 'content': 0.10057882964611053, 'timestamp': '2025-10-01 04:38:10.089565', 'step': 15368, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:10.143262', 'step': 15368, 'epoch': 3} {'type': 'loss', 'content': 0.13069473206996918, 'timestamp': '2025-10-01 04:38:10.145475', 'step': 15369, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:10.198726', 'step': 15369, 'epoch': 3} {'type': 'loss', 'content': 0.08420184254646301, 'timestamp': '2025-10-01 04:38:10.201005', 'step': 15370, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:10.263949', 'step': 15370, 'epoch': 3} {'type': 'loss', 'content': 0.11797814816236496, 'timestamp': '2025-10-01 04:38:10.266271', 'step': 15371, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:38:10.319848', 'step': 15371, 'epoch': 3} {'type': 'loss', 'content': 0.07650496065616608, 'timestamp': '2025-10-01 04:38:10.325644', 'step': 15372, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:10.378950', 'step': 15372, 'epoch': 3} {'type': 'loss', 'content': 0.11968206614255905, 'timestamp': '2025-10-01 04:38:10.382543', 'step': 15373, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:38:10.444262', 'step': 15373, 'epoch': 3} {'type': 'loss', 'content': 0.15021349489688873, 'timestamp': '2025-10-01 04:38:10.446809', 'step': 15374, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:38:10.500557', 'step': 15374, 'epoch': 3} {'type': 'loss', 'content': 0.14287208020687103, 'timestamp': '2025-10-01 04:38:10.503598', 'step': 15375, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:10.557556', 'step': 15375, 'epoch': 3} {'type': 'loss', 'content': 0.07625473290681839, 'timestamp': '2025-10-01 04:38:10.563329', 'step': 15376, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:38:10.616504', 'step': 15376, 'epoch': 3} {'type': 'loss', 'content': 0.10625497996807098, 'timestamp': '2025-10-01 04:38:10.618949', 'step': 15377, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:10.672951', 'step': 15377, 'epoch': 3} {'type': 'loss', 'content': 0.06820505857467651, 'timestamp': '2025-10-01 04:38:10.675139', 'step': 15378, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-01 04:38:23.847834', 'step': 15378, 'epoch': 3} {'type': 'pplx', 'content': 10133.450085628661, 'timestamp': '2025-10-01 04:38:23.851043', 'step': 15378, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:23.906341', 'step': 15378, 'epoch': 3} {'type': 'loss', 'content': 0.1036519780755043, 'timestamp': '2025-10-01 04:38:23.908826', 'step': 15379, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:23.966656', 'step': 15379, 'epoch': 3} {'type': 'loss', 'content': 0.18455302715301514, 'timestamp': '2025-10-01 04:38:23.973199', 'step': 15380, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:24.028786', 'step': 15380, 'epoch': 3} {'type': 'loss', 'content': 0.097807876765728, 'timestamp': '2025-10-01 04:38:24.031117', 'step': 15381, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:24.087665', 'step': 15381, 'epoch': 3} {'type': 'loss', 'content': 0.16121476888656616, 'timestamp': '2025-10-01 04:38:24.089936', 'step': 15382, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:24.146112', 'step': 15382, 'epoch': 3} {'type': 'loss', 'content': 0.04099372774362564, 'timestamp': '2025-10-01 04:38:24.148379', 'step': 15383, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:24.204099', 'step': 15383, 'epoch': 3} {'type': 'loss', 'content': 0.13347889482975006, 'timestamp': '2025-10-01 04:38:24.210687', 'step': 15384, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:38:24.265712', 'step': 15384, 'epoch': 3} {'type': 'loss', 'content': 0.07849173247814178, 'timestamp': '2025-10-01 04:38:24.268078', 'step': 15385, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:24.323481', 'step': 15385, 'epoch': 3} {'type': 'loss', 'content': 0.08392837643623352, 'timestamp': '2025-10-01 04:38:24.325837', 'step': 15386, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:24.381015', 'step': 15386, 'epoch': 3} {'type': 'loss', 'content': 0.05415426194667816, 'timestamp': '2025-10-01 04:38:24.383699', 'step': 15387, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:24.440630', 'step': 15387, 'epoch': 3} {'type': 'loss', 'content': 0.13122902810573578, 'timestamp': '2025-10-01 04:38:24.448571', 'step': 15388, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:24.504288', 'step': 15388, 'epoch': 3} {'type': 'loss', 'content': 0.14415611326694489, 'timestamp': '2025-10-01 04:38:24.506964', 'step': 15389, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:24.561863', 'step': 15389, 'epoch': 3} {'type': 'loss', 'content': 0.05131574347615242, 'timestamp': '2025-10-01 04:38:24.564246', 'step': 15390, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:24.618918', 'step': 15390, 'epoch': 3} {'type': 'loss', 'content': 0.19493645429611206, 'timestamp': '2025-10-01 04:38:24.622164', 'step': 15391, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:24.677731', 'step': 15391, 'epoch': 3} {'type': 'loss', 'content': 0.09763060510158539, 'timestamp': '2025-10-01 04:38:24.685644', 'step': 15392, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:24.745596', 'step': 15392, 'epoch': 3} {'type': 'loss', 'content': 0.10584099590778351, 'timestamp': '2025-10-01 04:38:24.747858', 'step': 15393, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:24.811296', 'step': 15393, 'epoch': 3} {'type': 'loss', 'content': 0.04120960459113121, 'timestamp': '2025-10-01 04:38:24.814834', 'step': 15394, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:24.872815', 'step': 15394, 'epoch': 3} {'type': 'loss', 'content': 0.16893872618675232, 'timestamp': '2025-10-01 04:38:24.875807', 'step': 15395, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:24.931896', 'step': 15395, 'epoch': 3} {'type': 'loss', 'content': 0.05621923506259918, 'timestamp': '2025-10-01 04:38:24.938885', 'step': 15396, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:24.991985', 'step': 15396, 'epoch': 3} {'type': 'loss', 'content': 0.1078726127743721, 'timestamp': '2025-10-01 04:38:24.994142', 'step': 15397, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:25.048241', 'step': 15397, 'epoch': 3} {'type': 'loss', 'content': 0.09328296035528183, 'timestamp': '2025-10-01 04:38:25.050453', 'step': 15398, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:38:25.104829', 'step': 15398, 'epoch': 3} {'type': 'loss', 'content': 0.0988871157169342, 'timestamp': '2025-10-01 04:38:25.107245', 'step': 15399, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:25.160792', 'step': 15399, 'epoch': 3} {'type': 'loss', 'content': 0.23640666902065277, 'timestamp': '2025-10-01 04:38:25.167017', 'step': 15400, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:38:25.222116', 'step': 15400, 'epoch': 3} {'type': 'loss', 'content': 0.16669690608978271, 'timestamp': '2025-10-01 04:38:25.224396', 'step': 15401, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:25.277991', 'step': 15401, 'epoch': 3} {'type': 'loss', 'content': 0.12174338102340698, 'timestamp': '2025-10-01 04:38:25.280408', 'step': 15402, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:38:25.336282', 'step': 15402, 'epoch': 3} {'type': 'loss', 'content': 0.07129621505737305, 'timestamp': '2025-10-01 04:38:25.339625', 'step': 15403, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:25.393685', 'step': 15403, 'epoch': 3} {'type': 'loss', 'content': 0.13327902555465698, 'timestamp': '2025-10-01 04:38:25.399623', 'step': 15404, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:25.453132', 'step': 15404, 'epoch': 3} {'type': 'loss', 'content': 0.08246598392724991, 'timestamp': '2025-10-01 04:38:25.461650', 'step': 15405, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:25.514952', 'step': 15405, 'epoch': 3} {'type': 'loss', 'content': 0.051821447908878326, 'timestamp': '2025-10-01 04:38:25.518481', 'step': 15406, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:38:25.573823', 'step': 15406, 'epoch': 3} {'type': 'loss', 'content': 0.023974625393748283, 'timestamp': '2025-10-01 04:38:25.575914', 'step': 15407, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:25.629491', 'step': 15407, 'epoch': 3} {'type': 'loss', 'content': 0.14517375826835632, 'timestamp': '2025-10-01 04:38:25.635383', 'step': 15408, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:25.689534', 'step': 15408, 'epoch': 3} {'type': 'loss', 'content': 0.11763189733028412, 'timestamp': '2025-10-01 04:38:25.691810', 'step': 15409, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:25.745962', 'step': 15409, 'epoch': 3} {'type': 'loss', 'content': 0.12372113764286041, 'timestamp': '2025-10-01 04:38:25.748469', 'step': 15410, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:25.808157', 'step': 15410, 'epoch': 3} {'type': 'loss', 'content': 0.14256717264652252, 'timestamp': '2025-10-01 04:38:25.810440', 'step': 15411, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:25.863782', 'step': 15411, 'epoch': 3} {'type': 'loss', 'content': 0.08972909301519394, 'timestamp': '2025-10-01 04:38:25.869543', 'step': 15412, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:25.923371', 'step': 15412, 'epoch': 3} {'type': 'loss', 'content': 0.04996196925640106, 'timestamp': '2025-10-01 04:38:25.925777', 'step': 15413, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:25.981653', 'step': 15413, 'epoch': 3} {'type': 'loss', 'content': 0.1393914371728897, 'timestamp': '2025-10-01 04:38:25.992504', 'step': 15414, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:26.061048', 'step': 15414, 'epoch': 3} {'type': 'loss', 'content': 0.13023430109024048, 'timestamp': '2025-10-01 04:38:26.063284', 'step': 15415, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:38:26.116452', 'step': 15415, 'epoch': 3} {'type': 'loss', 'content': 0.18210643529891968, 'timestamp': '2025-10-01 04:38:26.122328', 'step': 15416, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:38:26.175588', 'step': 15416, 'epoch': 3} {'type': 'loss', 'content': 0.09581400454044342, 'timestamp': '2025-10-01 04:38:26.178151', 'step': 15417, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:26.237838', 'step': 15417, 'epoch': 3} {'type': 'loss', 'content': 0.12185899913311005, 'timestamp': '2025-10-01 04:38:26.241281', 'step': 15418, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:26.294331', 'step': 15418, 'epoch': 3} {'type': 'loss', 'content': 0.049257438629865646, 'timestamp': '2025-10-01 04:38:26.296628', 'step': 15419, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:26.350685', 'step': 15419, 'epoch': 3} {'type': 'loss', 'content': 0.03758304938673973, 'timestamp': '2025-10-01 04:38:26.356397', 'step': 15420, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:38:26.409670', 'step': 15420, 'epoch': 3} {'type': 'loss', 'content': 0.05057627707719803, 'timestamp': '2025-10-01 04:38:26.424262', 'step': 15421, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:26.479710', 'step': 15421, 'epoch': 3} {'type': 'loss', 'content': 0.1352524310350418, 'timestamp': '2025-10-01 04:38:26.481958', 'step': 15422, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:26.535750', 'step': 15422, 'epoch': 3} {'type': 'loss', 'content': 0.08504806458950043, 'timestamp': '2025-10-01 04:38:26.537987', 'step': 15423, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:26.591251', 'step': 15423, 'epoch': 3} {'type': 'loss', 'content': 0.06407331675291061, 'timestamp': '2025-10-01 04:38:26.597165', 'step': 15424, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:38:26.650446', 'step': 15424, 'epoch': 3} {'type': 'loss', 'content': 0.0757800042629242, 'timestamp': '2025-10-01 04:38:26.652683', 'step': 15425, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:26.707262', 'step': 15425, 'epoch': 3} {'type': 'loss', 'content': 0.11024108529090881, 'timestamp': '2025-10-01 04:38:26.709637', 'step': 15426, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:26.763395', 'step': 15426, 'epoch': 3} {'type': 'loss', 'content': 0.1488063484430313, 'timestamp': '2025-10-01 04:38:26.765598', 'step': 15427, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:26.819118', 'step': 15427, 'epoch': 3} {'type': 'loss', 'content': 0.06164136156439781, 'timestamp': '2025-10-01 04:38:26.825106', 'step': 15428, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:26.877718', 'step': 15428, 'epoch': 3} {'type': 'loss', 'content': 0.08210522681474686, 'timestamp': '2025-10-01 04:38:26.879800', 'step': 15429, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:26.933527', 'step': 15429, 'epoch': 3} {'type': 'loss', 'content': 0.040531981736421585, 'timestamp': '2025-10-01 04:38:26.936133', 'step': 15430, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:38:26.989606', 'step': 15430, 'epoch': 3} {'type': 'loss', 'content': 0.03495031222701073, 'timestamp': '2025-10-01 04:38:26.992004', 'step': 15431, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:27.046604', 'step': 15431, 'epoch': 3} {'type': 'loss', 'content': 0.06395823508501053, 'timestamp': '2025-10-01 04:38:27.052555', 'step': 15432, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:27.105373', 'step': 15432, 'epoch': 3} {'type': 'loss', 'content': 0.16761904954910278, 'timestamp': '2025-10-01 04:38:27.107566', 'step': 15433, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:27.163880', 'step': 15433, 'epoch': 3} {'type': 'loss', 'content': 0.1299704611301422, 'timestamp': '2025-10-01 04:38:27.166086', 'step': 15434, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:27.219321', 'step': 15434, 'epoch': 3} {'type': 'loss', 'content': 0.0775071457028389, 'timestamp': '2025-10-01 04:38:27.231513', 'step': 15435, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:27.288869', 'step': 15435, 'epoch': 3} {'type': 'loss', 'content': 0.06636914610862732, 'timestamp': '2025-10-01 04:38:27.294775', 'step': 15436, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:27.347972', 'step': 15436, 'epoch': 3} {'type': 'loss', 'content': 0.11986377090215683, 'timestamp': '2025-10-01 04:38:27.350175', 'step': 15437, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:27.403879', 'step': 15437, 'epoch': 3} {'type': 'loss', 'content': 0.07916449755430222, 'timestamp': '2025-10-01 04:38:27.406134', 'step': 15438, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:27.459899', 'step': 15438, 'epoch': 3} {'type': 'loss', 'content': 0.1515544354915619, 'timestamp': '2025-10-01 04:38:27.462088', 'step': 15439, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:38:27.515814', 'step': 15439, 'epoch': 3} {'type': 'loss', 'content': 0.0812813937664032, 'timestamp': '2025-10-01 04:38:27.521643', 'step': 15440, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:38:27.575054', 'step': 15440, 'epoch': 3} {'type': 'loss', 'content': 0.06919845193624496, 'timestamp': '2025-10-01 04:38:27.577279', 'step': 15441, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:27.631008', 'step': 15441, 'epoch': 3} {'type': 'loss', 'content': 0.23390598595142365, 'timestamp': '2025-10-01 04:38:27.633293', 'step': 15442, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:27.686532', 'step': 15442, 'epoch': 3} {'type': 'loss', 'content': 0.0994148924946785, 'timestamp': '2025-10-01 04:38:27.689148', 'step': 15443, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:27.742394', 'step': 15443, 'epoch': 3} {'type': 'loss', 'content': 0.019369017332792282, 'timestamp': '2025-10-01 04:38:27.748171', 'step': 15444, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:27.802814', 'step': 15444, 'epoch': 3} {'type': 'loss', 'content': 0.06283940374851227, 'timestamp': '2025-10-01 04:38:27.805621', 'step': 15445, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:38:27.859210', 'step': 15445, 'epoch': 3} {'type': 'loss', 'content': 0.15781135857105255, 'timestamp': '2025-10-01 04:38:27.861453', 'step': 15446, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:27.915193', 'step': 15446, 'epoch': 3} {'type': 'loss', 'content': 0.11506529152393341, 'timestamp': '2025-10-01 04:38:27.917447', 'step': 15447, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:38:27.970750', 'step': 15447, 'epoch': 3} {'type': 'loss', 'content': 0.142081618309021, 'timestamp': '2025-10-01 04:38:27.976614', 'step': 15448, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:28.029794', 'step': 15448, 'epoch': 3} {'type': 'loss', 'content': 0.08785118162631989, 'timestamp': '2025-10-01 04:38:28.032245', 'step': 15449, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:28.086705', 'step': 15449, 'epoch': 3} {'type': 'loss', 'content': 0.11575073003768921, 'timestamp': '2025-10-01 04:38:28.089145', 'step': 15450, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:28.142948', 'step': 15450, 'epoch': 3} {'type': 'loss', 'content': 0.10303600132465363, 'timestamp': '2025-10-01 04:38:28.144776', 'step': 15451, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:28.198184', 'step': 15451, 'epoch': 3} {'type': 'loss', 'content': 0.04688229784369469, 'timestamp': '2025-10-01 04:38:28.203930', 'step': 15452, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-01 04:38:28.256697', 'step': 15452, 'epoch': 3} {'type': 'loss', 'content': 0.11204314976930618, 'timestamp': '2025-10-01 04:38:28.258840', 'step': 15453, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:38:28.312242', 'step': 15453, 'epoch': 3} {'type': 'loss', 'content': 0.09007670730352402, 'timestamp': '2025-10-01 04:38:28.314650', 'step': 15454, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:28.368411', 'step': 15454, 'epoch': 3} {'type': 'loss', 'content': 0.15112470090389252, 'timestamp': '2025-10-01 04:38:28.370564', 'step': 15455, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:38:28.424745', 'step': 15455, 'epoch': 3} {'type': 'loss', 'content': 0.15114617347717285, 'timestamp': '2025-10-01 04:38:28.432375', 'step': 15456, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:28.486318', 'step': 15456, 'epoch': 3} {'type': 'loss', 'content': 0.06643123924732208, 'timestamp': '2025-10-01 04:38:28.489099', 'step': 15457, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:28.543699', 'step': 15457, 'epoch': 3} {'type': 'loss', 'content': 0.061141036450862885, 'timestamp': '2025-10-01 04:38:28.546402', 'step': 15458, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:28.600688', 'step': 15458, 'epoch': 3} {'type': 'loss', 'content': 0.17399777472019196, 'timestamp': '2025-10-01 04:38:28.602947', 'step': 15459, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:28.657433', 'step': 15459, 'epoch': 3} {'type': 'loss', 'content': 0.12277093529701233, 'timestamp': '2025-10-01 04:38:28.663801', 'step': 15460, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:28.717604', 'step': 15460, 'epoch': 3} {'type': 'loss', 'content': 0.07493213564157486, 'timestamp': '2025-10-01 04:38:28.719778', 'step': 15461, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:28.774830', 'step': 15461, 'epoch': 3} {'type': 'loss', 'content': 0.11083486676216125, 'timestamp': '2025-10-01 04:38:28.776920', 'step': 15462, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:28.831618', 'step': 15462, 'epoch': 3} {'type': 'loss', 'content': 0.07244141399860382, 'timestamp': '2025-10-01 04:38:28.834075', 'step': 15463, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:28.888076', 'step': 15463, 'epoch': 3} {'type': 'loss', 'content': 0.055916234850883484, 'timestamp': '2025-10-01 04:38:28.893997', 'step': 15464, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:28.947260', 'step': 15464, 'epoch': 3} {'type': 'loss', 'content': 0.1024392619729042, 'timestamp': '2025-10-01 04:38:28.949402', 'step': 15465, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:29.002770', 'step': 15465, 'epoch': 3} {'type': 'loss', 'content': 0.10361596941947937, 'timestamp': '2025-10-01 04:38:29.004899', 'step': 15466, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:29.058674', 'step': 15466, 'epoch': 3} {'type': 'loss', 'content': 0.09007830917835236, 'timestamp': '2025-10-01 04:38:29.061094', 'step': 15467, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:38:29.119113', 'step': 15467, 'epoch': 3} {'type': 'loss', 'content': 0.16403624415397644, 'timestamp': '2025-10-01 04:38:29.125252', 'step': 15468, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:29.178421', 'step': 15468, 'epoch': 3} {'type': 'loss', 'content': 0.11398390680551529, 'timestamp': '2025-10-01 04:38:29.180523', 'step': 15469, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:29.233830', 'step': 15469, 'epoch': 3} {'type': 'loss', 'content': 0.07865233719348907, 'timestamp': '2025-10-01 04:38:29.236170', 'step': 15470, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:29.289306', 'step': 15470, 'epoch': 3} {'type': 'loss', 'content': 0.08049587905406952, 'timestamp': '2025-10-01 04:38:29.291534', 'step': 15471, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:29.344775', 'step': 15471, 'epoch': 3} {'type': 'loss', 'content': 0.16462616622447968, 'timestamp': '2025-10-01 04:38:29.350573', 'step': 15472, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:29.403375', 'step': 15472, 'epoch': 3} {'type': 'loss', 'content': 0.09953667223453522, 'timestamp': '2025-10-01 04:38:29.405477', 'step': 15473, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:29.459197', 'step': 15473, 'epoch': 3} {'type': 'loss', 'content': 0.11164991557598114, 'timestamp': '2025-10-01 04:38:29.461523', 'step': 15474, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:29.515580', 'step': 15474, 'epoch': 3} {'type': 'loss', 'content': 0.09201689809560776, 'timestamp': '2025-10-01 04:38:29.517944', 'step': 15475, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:29.571421', 'step': 15475, 'epoch': 3} {'type': 'loss', 'content': 0.08329730480909348, 'timestamp': '2025-10-01 04:38:29.577226', 'step': 15476, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:29.630505', 'step': 15476, 'epoch': 3} {'type': 'loss', 'content': 0.05334413796663284, 'timestamp': '2025-10-01 04:38:29.632684', 'step': 15477, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:29.686797', 'step': 15477, 'epoch': 3} {'type': 'loss', 'content': 0.06148991733789444, 'timestamp': '2025-10-01 04:38:29.689069', 'step': 15478, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:29.744211', 'step': 15478, 'epoch': 3} {'type': 'loss', 'content': 0.10890451818704605, 'timestamp': '2025-10-01 04:38:29.748103', 'step': 15479, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:29.802732', 'step': 15479, 'epoch': 3} {'type': 'loss', 'content': 0.06746158003807068, 'timestamp': '2025-10-01 04:38:29.808855', 'step': 15480, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:29.863385', 'step': 15480, 'epoch': 3} {'type': 'loss', 'content': 0.15846741199493408, 'timestamp': '2025-10-01 04:38:29.866023', 'step': 15481, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:29.919781', 'step': 15481, 'epoch': 3} {'type': 'loss', 'content': 0.05215882882475853, 'timestamp': '2025-10-01 04:38:29.921938', 'step': 15482, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:38:29.976506', 'step': 15482, 'epoch': 3} {'type': 'loss', 'content': 0.09782084822654724, 'timestamp': '2025-10-01 04:38:29.978816', 'step': 15483, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:30.032692', 'step': 15483, 'epoch': 3} {'type': 'loss', 'content': 0.19731104373931885, 'timestamp': '2025-10-01 04:38:30.038667', 'step': 15484, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-01 04:38:30.092393', 'step': 15484, 'epoch': 3} {'type': 'loss', 'content': 0.15396659076213837, 'timestamp': '2025-10-01 04:38:30.094588', 'step': 15485, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:30.149198', 'step': 15485, 'epoch': 3} {'type': 'loss', 'content': 0.09578975290060043, 'timestamp': '2025-10-01 04:38:30.151434', 'step': 15486, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:30.207147', 'step': 15486, 'epoch': 3} {'type': 'loss', 'content': 0.12975861132144928, 'timestamp': '2025-10-01 04:38:30.219137', 'step': 15487, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:30.273712', 'step': 15487, 'epoch': 3} {'type': 'loss', 'content': 0.09051506221294403, 'timestamp': '2025-10-01 04:38:30.280546', 'step': 15488, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:30.335565', 'step': 15488, 'epoch': 3} {'type': 'loss', 'content': 0.10216651856899261, 'timestamp': '2025-10-01 04:38:30.337966', 'step': 15489, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:30.391830', 'step': 15489, 'epoch': 3} {'type': 'loss', 'content': 0.1149900034070015, 'timestamp': '2025-10-01 04:38:30.394176', 'step': 15490, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:30.448543', 'step': 15490, 'epoch': 3} {'type': 'loss', 'content': 0.10228952020406723, 'timestamp': '2025-10-01 04:38:30.450769', 'step': 15491, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:30.504331', 'step': 15491, 'epoch': 3} {'type': 'loss', 'content': 0.04896175488829613, 'timestamp': '2025-10-01 04:38:30.512716', 'step': 15492, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:30.565859', 'step': 15492, 'epoch': 3} {'type': 'loss', 'content': 0.059259265661239624, 'timestamp': '2025-10-01 04:38:30.569631', 'step': 15493, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:38:30.624739', 'step': 15493, 'epoch': 3} {'type': 'loss', 'content': 0.23122942447662354, 'timestamp': '2025-10-01 04:38:30.626900', 'step': 15494, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:30.688718', 'step': 15494, 'epoch': 3} {'type': 'loss', 'content': 0.03856133669614792, 'timestamp': '2025-10-01 04:38:30.690983', 'step': 15495, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:30.744860', 'step': 15495, 'epoch': 3} {'type': 'loss', 'content': 0.0724840983748436, 'timestamp': '2025-10-01 04:38:30.751262', 'step': 15496, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:30.804811', 'step': 15496, 'epoch': 3} {'type': 'loss', 'content': 0.036340124905109406, 'timestamp': '2025-10-01 04:38:30.807018', 'step': 15497, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:30.860556', 'step': 15497, 'epoch': 3} {'type': 'loss', 'content': 0.15584808588027954, 'timestamp': '2025-10-01 04:38:30.862873', 'step': 15498, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:30.916826', 'step': 15498, 'epoch': 3} {'type': 'loss', 'content': 0.14387837052345276, 'timestamp': '2025-10-01 04:38:30.919399', 'step': 15499, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:38:30.973447', 'step': 15499, 'epoch': 3} {'type': 'loss', 'content': 0.13697192072868347, 'timestamp': '2025-10-01 04:38:30.979860', 'step': 15500, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 15500', 'timestamp': '2025-10-01 04:38:31.523959', 'step': 15500, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:31.581870', 'step': 15500, 'epoch': 3} {'type': 'loss', 'content': 0.07076683640480042, 'timestamp': '2025-10-01 04:38:31.584782', 'step': 15501, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:31.640106', 'step': 15501, 'epoch': 3} {'type': 'loss', 'content': 0.07706043869256973, 'timestamp': '2025-10-01 04:38:31.643180', 'step': 15502, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:31.698314', 'step': 15502, 'epoch': 3} {'type': 'loss', 'content': 0.059966884553432465, 'timestamp': '2025-10-01 04:38:31.701386', 'step': 15503, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:31.759929', 'step': 15503, 'epoch': 3} {'type': 'loss', 'content': 0.15010526776313782, 'timestamp': '2025-10-01 04:38:31.766605', 'step': 15504, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:38:31.820285', 'step': 15504, 'epoch': 3} {'type': 'loss', 'content': 0.13706143200397491, 'timestamp': '2025-10-01 04:38:31.822649', 'step': 15505, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-01 04:38:31.877336', 'step': 15505, 'epoch': 3} {'type': 'loss', 'content': 0.06827609241008759, 'timestamp': '2025-10-01 04:38:31.880321', 'step': 15506, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:31.935951', 'step': 15506, 'epoch': 3} {'type': 'loss', 'content': 0.11086592823266983, 'timestamp': '2025-10-01 04:38:31.938501', 'step': 15507, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:31.993362', 'step': 15507, 'epoch': 3} {'type': 'loss', 'content': 0.1060825064778328, 'timestamp': '2025-10-01 04:38:31.999733', 'step': 15508, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:32.054098', 'step': 15508, 'epoch': 3} {'type': 'loss', 'content': 0.16781416535377502, 'timestamp': '2025-10-01 04:38:32.056877', 'step': 15509, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:32.110972', 'step': 15509, 'epoch': 3} {'type': 'loss', 'content': 0.11664461344480515, 'timestamp': '2025-10-01 04:38:32.113879', 'step': 15510, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:38:32.169748', 'step': 15510, 'epoch': 3} {'type': 'loss', 'content': 0.1239745169878006, 'timestamp': '2025-10-01 04:38:32.172445', 'step': 15511, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-01 04:38:32.226917', 'step': 15511, 'epoch': 3} {'type': 'loss', 'content': 0.1313267946243286, 'timestamp': '2025-10-01 04:38:32.232694', 'step': 15512, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:32.286214', 'step': 15512, 'epoch': 3} {'type': 'loss', 'content': 0.15640684962272644, 'timestamp': '2025-10-01 04:38:32.288405', 'step': 15513, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:32.346044', 'step': 15513, 'epoch': 3} {'type': 'loss', 'content': 0.07524056732654572, 'timestamp': '2025-10-01 04:38:32.348693', 'step': 15514, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:32.403655', 'step': 15514, 'epoch': 3} {'type': 'loss', 'content': 0.10396487265825272, 'timestamp': '2025-10-01 04:38:32.406305', 'step': 15515, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:32.461582', 'step': 15515, 'epoch': 3} {'type': 'loss', 'content': 0.18706773221492767, 'timestamp': '2025-10-01 04:38:32.467950', 'step': 15516, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:38:32.521314', 'step': 15516, 'epoch': 3} {'type': 'loss', 'content': 0.10157809406518936, 'timestamp': '2025-10-01 04:38:32.523707', 'step': 15517, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:32.577408', 'step': 15517, 'epoch': 3} {'type': 'loss', 'content': 0.09760396927595139, 'timestamp': '2025-10-01 04:38:32.579735', 'step': 15518, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:32.633885', 'step': 15518, 'epoch': 3} {'type': 'loss', 'content': 0.10721748322248459, 'timestamp': '2025-10-01 04:38:32.636417', 'step': 15519, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:38:32.694973', 'step': 15519, 'epoch': 3} {'type': 'loss', 'content': 0.13058121502399445, 'timestamp': '2025-10-01 04:38:32.700852', 'step': 15520, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:32.754634', 'step': 15520, 'epoch': 3} {'type': 'loss', 'content': 0.05375993996858597, 'timestamp': '2025-10-01 04:38:32.756933', 'step': 15521, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:38:32.810418', 'step': 15521, 'epoch': 3} {'type': 'loss', 'content': 0.21780864894390106, 'timestamp': '2025-10-01 04:38:32.814084', 'step': 15522, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:32.867912', 'step': 15522, 'epoch': 3} {'type': 'loss', 'content': 0.11585468053817749, 'timestamp': '2025-10-01 04:38:32.870107', 'step': 15523, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:32.923498', 'step': 15523, 'epoch': 3} {'type': 'loss', 'content': 0.11934029310941696, 'timestamp': '2025-10-01 04:38:32.929266', 'step': 15524, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:38:32.982651', 'step': 15524, 'epoch': 3} {'type': 'loss', 'content': 0.11577625572681427, 'timestamp': '2025-10-01 04:38:32.984605', 'step': 15525, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:33.037815', 'step': 15525, 'epoch': 3} {'type': 'loss', 'content': 0.06270243227481842, 'timestamp': '2025-10-01 04:38:33.040075', 'step': 15526, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:33.093969', 'step': 15526, 'epoch': 3} {'type': 'loss', 'content': 0.06368878483772278, 'timestamp': '2025-10-01 04:38:33.096793', 'step': 15527, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:33.152305', 'step': 15527, 'epoch': 3} {'type': 'loss', 'content': 0.0976119264960289, 'timestamp': '2025-10-01 04:38:33.158146', 'step': 15528, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:33.210715', 'step': 15528, 'epoch': 3} {'type': 'loss', 'content': 0.03912869840860367, 'timestamp': '2025-10-01 04:38:33.213015', 'step': 15529, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:33.266460', 'step': 15529, 'epoch': 3} {'type': 'loss', 'content': 0.13264866173267365, 'timestamp': '2025-10-01 04:38:33.277464', 'step': 15530, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:33.330785', 'step': 15530, 'epoch': 3} {'type': 'loss', 'content': 0.11055803298950195, 'timestamp': '2025-10-01 04:38:33.333791', 'step': 15531, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:33.386957', 'step': 15531, 'epoch': 3} {'type': 'loss', 'content': 0.12732930481433868, 'timestamp': '2025-10-01 04:38:33.392960', 'step': 15532, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:33.445464', 'step': 15532, 'epoch': 3} {'type': 'loss', 'content': 0.09209538251161575, 'timestamp': '2025-10-01 04:38:33.449451', 'step': 15533, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:33.502632', 'step': 15533, 'epoch': 3} {'type': 'loss', 'content': 0.04984800145030022, 'timestamp': '2025-10-01 04:38:33.504772', 'step': 15534, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:33.559017', 'step': 15534, 'epoch': 3} {'type': 'loss', 'content': 0.15555956959724426, 'timestamp': '2025-10-01 04:38:33.561500', 'step': 15535, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:33.614426', 'step': 15535, 'epoch': 3} {'type': 'loss', 'content': 0.09420444071292877, 'timestamp': '2025-10-01 04:38:33.620316', 'step': 15536, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:33.673344', 'step': 15536, 'epoch': 3} {'type': 'loss', 'content': 0.14451825618743896, 'timestamp': '2025-10-01 04:38:33.675766', 'step': 15537, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:33.729274', 'step': 15537, 'epoch': 3} {'type': 'loss', 'content': 0.11399415880441666, 'timestamp': '2025-10-01 04:38:33.731796', 'step': 15538, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:33.788251', 'step': 15538, 'epoch': 3} {'type': 'loss', 'content': 0.150553360581398, 'timestamp': '2025-10-01 04:38:33.790640', 'step': 15539, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:38:33.844615', 'step': 15539, 'epoch': 3} {'type': 'loss', 'content': 0.1433374136686325, 'timestamp': '2025-10-01 04:38:33.850493', 'step': 15540, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:33.906761', 'step': 15540, 'epoch': 3} {'type': 'loss', 'content': 0.1158040389418602, 'timestamp': '2025-10-01 04:38:33.908897', 'step': 15541, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:38:33.962144', 'step': 15541, 'epoch': 3} {'type': 'loss', 'content': 0.13277514278888702, 'timestamp': '2025-10-01 04:38:33.964525', 'step': 15542, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:34.030201', 'step': 15542, 'epoch': 3} {'type': 'loss', 'content': 0.14713473618030548, 'timestamp': '2025-10-01 04:38:34.033100', 'step': 15543, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:34.087290', 'step': 15543, 'epoch': 3} {'type': 'loss', 'content': 0.09426647424697876, 'timestamp': '2025-10-01 04:38:34.093735', 'step': 15544, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:34.146992', 'step': 15544, 'epoch': 3} {'type': 'loss', 'content': 0.0653565376996994, 'timestamp': '2025-10-01 04:38:34.149454', 'step': 15545, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:34.204002', 'step': 15545, 'epoch': 3} {'type': 'loss', 'content': 0.15374594926834106, 'timestamp': '2025-10-01 04:38:34.206449', 'step': 15546, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:34.259815', 'step': 15546, 'epoch': 3} {'type': 'loss', 'content': 0.21874158084392548, 'timestamp': '2025-10-01 04:38:34.262291', 'step': 15547, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:38:34.315674', 'step': 15547, 'epoch': 3} {'type': 'loss', 'content': 0.09583409130573273, 'timestamp': '2025-10-01 04:38:34.321596', 'step': 15548, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:34.374267', 'step': 15548, 'epoch': 3} {'type': 'loss', 'content': 0.15418855845928192, 'timestamp': '2025-10-01 04:38:34.376414', 'step': 15549, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:34.439335', 'step': 15549, 'epoch': 3} {'type': 'loss', 'content': 0.045955732464790344, 'timestamp': '2025-10-01 04:38:34.441586', 'step': 15550, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:34.494957', 'step': 15550, 'epoch': 3} {'type': 'loss', 'content': 0.10221939533948898, 'timestamp': '2025-10-01 04:38:34.498055', 'step': 15551, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:34.553172', 'step': 15551, 'epoch': 3} {'type': 'loss', 'content': 0.08082400262355804, 'timestamp': '2025-10-01 04:38:34.559319', 'step': 15552, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:34.620686', 'step': 15552, 'epoch': 3} {'type': 'loss', 'content': 0.11913114786148071, 'timestamp': '2025-10-01 04:38:34.623271', 'step': 15553, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:34.690867', 'step': 15553, 'epoch': 3} {'type': 'loss', 'content': 0.15655702352523804, 'timestamp': '2025-10-01 04:38:34.693110', 'step': 15554, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:38:34.756399', 'step': 15554, 'epoch': 3} {'type': 'loss', 'content': 0.09444443136453629, 'timestamp': '2025-10-01 04:38:34.758589', 'step': 15555, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:34.812099', 'step': 15555, 'epoch': 3} {'type': 'loss', 'content': 0.09595094621181488, 'timestamp': '2025-10-01 04:38:34.818001', 'step': 15556, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:34.876520', 'step': 15556, 'epoch': 3} {'type': 'loss', 'content': 0.07070938497781754, 'timestamp': '2025-10-01 04:38:34.879165', 'step': 15557, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:34.933098', 'step': 15557, 'epoch': 3} {'type': 'loss', 'content': 0.11911868304014206, 'timestamp': '2025-10-01 04:38:34.935398', 'step': 15558, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:34.989437', 'step': 15558, 'epoch': 3} {'type': 'loss', 'content': 0.12420696765184402, 'timestamp': '2025-10-01 04:38:34.992357', 'step': 15559, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:38:35.045692', 'step': 15559, 'epoch': 3} {'type': 'loss', 'content': 0.09129278361797333, 'timestamp': '2025-10-01 04:38:35.051682', 'step': 15560, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:35.107229', 'step': 15560, 'epoch': 3} {'type': 'loss', 'content': 0.06203869357705116, 'timestamp': '2025-10-01 04:38:35.118913', 'step': 15561, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:35.172818', 'step': 15561, 'epoch': 3} {'type': 'loss', 'content': 0.04607614129781723, 'timestamp': '2025-10-01 04:38:35.175017', 'step': 15562, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:35.228329', 'step': 15562, 'epoch': 3} {'type': 'loss', 'content': 0.125425785779953, 'timestamp': '2025-10-01 04:38:35.230534', 'step': 15563, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:35.284705', 'step': 15563, 'epoch': 3} {'type': 'loss', 'content': 0.0977059155702591, 'timestamp': '2025-10-01 04:38:35.290776', 'step': 15564, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:35.343275', 'step': 15564, 'epoch': 3} {'type': 'loss', 'content': 0.09103849530220032, 'timestamp': '2025-10-01 04:38:35.345986', 'step': 15565, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:35.400495', 'step': 15565, 'epoch': 3} {'type': 'loss', 'content': 0.07972373068332672, 'timestamp': '2025-10-01 04:38:35.403675', 'step': 15566, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:35.456943', 'step': 15566, 'epoch': 3} {'type': 'loss', 'content': 0.15948118269443512, 'timestamp': '2025-10-01 04:38:35.459150', 'step': 15567, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:35.512129', 'step': 15567, 'epoch': 3} {'type': 'loss', 'content': 0.12885220348834991, 'timestamp': '2025-10-01 04:38:35.518351', 'step': 15568, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:35.572456', 'step': 15568, 'epoch': 3} {'type': 'loss', 'content': 0.13492843508720398, 'timestamp': '2025-10-01 04:38:35.575359', 'step': 15569, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:35.629508', 'step': 15569, 'epoch': 3} {'type': 'loss', 'content': 0.09607528895139694, 'timestamp': '2025-10-01 04:38:35.631730', 'step': 15570, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:38:35.685593', 'step': 15570, 'epoch': 3} {'type': 'loss', 'content': 0.171456441283226, 'timestamp': '2025-10-01 04:38:35.687924', 'step': 15571, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:35.741655', 'step': 15571, 'epoch': 3} {'type': 'loss', 'content': 0.03660959377884865, 'timestamp': '2025-10-01 04:38:35.747570', 'step': 15572, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:38:35.800998', 'step': 15572, 'epoch': 3} {'type': 'loss', 'content': 0.12431354820728302, 'timestamp': '2025-10-01 04:38:35.803280', 'step': 15573, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:35.856411', 'step': 15573, 'epoch': 3} {'type': 'loss', 'content': 0.10999169200658798, 'timestamp': '2025-10-01 04:38:35.858864', 'step': 15574, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:38:35.921297', 'step': 15574, 'epoch': 3} {'type': 'loss', 'content': 0.1664431095123291, 'timestamp': '2025-10-01 04:38:35.923653', 'step': 15575, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:35.977110', 'step': 15575, 'epoch': 3} {'type': 'loss', 'content': 0.05757893994450569, 'timestamp': '2025-10-01 04:38:35.982867', 'step': 15576, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:36.035540', 'step': 15576, 'epoch': 3} {'type': 'loss', 'content': 0.08792639523744583, 'timestamp': '2025-10-01 04:38:36.037797', 'step': 15577, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:36.091204', 'step': 15577, 'epoch': 3} {'type': 'loss', 'content': 0.07772764563560486, 'timestamp': '2025-10-01 04:38:36.093422', 'step': 15578, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:36.146816', 'step': 15578, 'epoch': 3} {'type': 'loss', 'content': 0.07004902511835098, 'timestamp': '2025-10-01 04:38:36.149085', 'step': 15579, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:36.209496', 'step': 15579, 'epoch': 3} {'type': 'loss', 'content': 0.09123807400465012, 'timestamp': '2025-10-01 04:38:36.215216', 'step': 15580, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:36.269388', 'step': 15580, 'epoch': 3} {'type': 'loss', 'content': 0.17058928310871124, 'timestamp': '2025-10-01 04:38:36.271538', 'step': 15581, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:36.324550', 'step': 15581, 'epoch': 3} {'type': 'loss', 'content': 0.10965781658887863, 'timestamp': '2025-10-01 04:38:36.327814', 'step': 15582, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:36.381034', 'step': 15582, 'epoch': 3} {'type': 'loss', 'content': 0.1987885981798172, 'timestamp': '2025-10-01 04:38:36.383354', 'step': 15583, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:36.436415', 'step': 15583, 'epoch': 3} {'type': 'loss', 'content': 0.10658591240644455, 'timestamp': '2025-10-01 04:38:36.442166', 'step': 15584, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:36.494739', 'step': 15584, 'epoch': 3} {'type': 'loss', 'content': 0.0836481973528862, 'timestamp': '2025-10-01 04:38:36.496897', 'step': 15585, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:36.549937', 'step': 15585, 'epoch': 3} {'type': 'loss', 'content': 0.08729959279298782, 'timestamp': '2025-10-01 04:38:36.552261', 'step': 15586, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:38:36.605462', 'step': 15586, 'epoch': 3} {'type': 'loss', 'content': 0.19762742519378662, 'timestamp': '2025-10-01 04:38:36.607752', 'step': 15587, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:36.661324', 'step': 15587, 'epoch': 3} {'type': 'loss', 'content': 0.13053864240646362, 'timestamp': '2025-10-01 04:38:36.667444', 'step': 15588, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:36.719874', 'step': 15588, 'epoch': 3} {'type': 'loss', 'content': 0.23104043304920197, 'timestamp': '2025-10-01 04:38:36.722690', 'step': 15589, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:38:36.776092', 'step': 15589, 'epoch': 3} {'type': 'loss', 'content': 0.08626995980739594, 'timestamp': '2025-10-01 04:38:36.778310', 'step': 15590, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:36.831857', 'step': 15590, 'epoch': 3} {'type': 'loss', 'content': 0.080867700278759, 'timestamp': '2025-10-01 04:38:36.834122', 'step': 15591, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:36.887236', 'step': 15591, 'epoch': 3} {'type': 'loss', 'content': 0.05201170593500137, 'timestamp': '2025-10-01 04:38:36.893037', 'step': 15592, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:36.951689', 'step': 15592, 'epoch': 3} {'type': 'loss', 'content': 0.1242658868432045, 'timestamp': '2025-10-01 04:38:36.953940', 'step': 15593, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:37.007057', 'step': 15593, 'epoch': 3} {'type': 'loss', 'content': 0.09396962821483612, 'timestamp': '2025-10-01 04:38:37.009221', 'step': 15594, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:37.063113', 'step': 15594, 'epoch': 3} {'type': 'loss', 'content': 0.12416651099920273, 'timestamp': '2025-10-01 04:38:37.065705', 'step': 15595, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:38:37.119041', 'step': 15595, 'epoch': 3} {'type': 'loss', 'content': 0.054340068250894547, 'timestamp': '2025-10-01 04:38:37.124783', 'step': 15596, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:37.177667', 'step': 15596, 'epoch': 3} {'type': 'loss', 'content': 0.08970190584659576, 'timestamp': '2025-10-01 04:38:37.180224', 'step': 15597, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:37.233359', 'step': 15597, 'epoch': 3} {'type': 'loss', 'content': 0.08476988226175308, 'timestamp': '2025-10-01 04:38:37.235696', 'step': 15598, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:37.289224', 'step': 15598, 'epoch': 3} {'type': 'loss', 'content': 0.11970581114292145, 'timestamp': '2025-10-01 04:38:37.291878', 'step': 15599, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:38:37.345215', 'step': 15599, 'epoch': 3} {'type': 'loss', 'content': 0.07575352489948273, 'timestamp': '2025-10-01 04:38:37.351029', 'step': 15600, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:37.404181', 'step': 15600, 'epoch': 3} {'type': 'loss', 'content': 0.08828972280025482, 'timestamp': '2025-10-01 04:38:37.406455', 'step': 15601, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:37.459684', 'step': 15601, 'epoch': 3} {'type': 'loss', 'content': 0.12261014431715012, 'timestamp': '2025-10-01 04:38:37.462170', 'step': 15602, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:37.515903', 'step': 15602, 'epoch': 3} {'type': 'loss', 'content': 0.12531806528568268, 'timestamp': '2025-10-01 04:38:37.518353', 'step': 15603, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:37.582627', 'step': 15603, 'epoch': 3} {'type': 'loss', 'content': 0.05999626964330673, 'timestamp': '2025-10-01 04:38:37.588967', 'step': 15604, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:37.642457', 'step': 15604, 'epoch': 3} {'type': 'loss', 'content': 0.14186537265777588, 'timestamp': '2025-10-01 04:38:37.644663', 'step': 15605, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:37.698594', 'step': 15605, 'epoch': 3} {'type': 'loss', 'content': 0.11744901537895203, 'timestamp': '2025-10-01 04:38:37.700830', 'step': 15606, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:37.754332', 'step': 15606, 'epoch': 3} {'type': 'loss', 'content': 0.026536280289292336, 'timestamp': '2025-10-01 04:38:37.756603', 'step': 15607, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:37.809702', 'step': 15607, 'epoch': 3} {'type': 'loss', 'content': 0.09391362965106964, 'timestamp': '2025-10-01 04:38:37.815413', 'step': 15608, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:37.868382', 'step': 15608, 'epoch': 3} {'type': 'loss', 'content': 0.13169148564338684, 'timestamp': '2025-10-01 04:38:37.870557', 'step': 15609, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:38:37.932714', 'step': 15609, 'epoch': 3} {'type': 'loss', 'content': 0.20302581787109375, 'timestamp': '2025-10-01 04:38:37.934911', 'step': 15610, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:37.987959', 'step': 15610, 'epoch': 3} {'type': 'loss', 'content': 0.0847996398806572, 'timestamp': '2025-10-01 04:38:37.990801', 'step': 15611, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:38.043698', 'step': 15611, 'epoch': 3} {'type': 'loss', 'content': 0.11824877560138702, 'timestamp': '2025-10-01 04:38:38.049470', 'step': 15612, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:38.102010', 'step': 15612, 'epoch': 3} {'type': 'loss', 'content': 0.11825087666511536, 'timestamp': '2025-10-01 04:38:38.104167', 'step': 15613, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:38.167223', 'step': 15613, 'epoch': 3} {'type': 'loss', 'content': 0.20185282826423645, 'timestamp': '2025-10-01 04:38:38.169988', 'step': 15614, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:38.222741', 'step': 15614, 'epoch': 3} {'type': 'loss', 'content': 0.1523532122373581, 'timestamp': '2025-10-01 04:38:38.224965', 'step': 15615, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:38.277721', 'step': 15615, 'epoch': 3} {'type': 'loss', 'content': 0.09628099948167801, 'timestamp': '2025-10-01 04:38:38.283289', 'step': 15616, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:38.335678', 'step': 15616, 'epoch': 3} {'type': 'loss', 'content': 0.10044188797473907, 'timestamp': '2025-10-01 04:38:38.346547', 'step': 15617, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:38:38.399977', 'step': 15617, 'epoch': 3} {'type': 'loss', 'content': 0.10630983859300613, 'timestamp': '2025-10-01 04:38:38.402850', 'step': 15618, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:38.456127', 'step': 15618, 'epoch': 3} {'type': 'loss', 'content': 0.10416588187217712, 'timestamp': '2025-10-01 04:38:38.458347', 'step': 15619, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:38.511736', 'step': 15619, 'epoch': 3} {'type': 'loss', 'content': 0.05361783504486084, 'timestamp': '2025-10-01 04:38:38.517499', 'step': 15620, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:38.570362', 'step': 15620, 'epoch': 3} {'type': 'loss', 'content': 0.07634071260690689, 'timestamp': '2025-10-01 04:38:38.572791', 'step': 15621, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:38.626272', 'step': 15621, 'epoch': 3} {'type': 'loss', 'content': 0.03707648441195488, 'timestamp': '2025-10-01 04:38:38.628567', 'step': 15622, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:38.683339', 'step': 15622, 'epoch': 3} {'type': 'loss', 'content': 0.10735389590263367, 'timestamp': '2025-10-01 04:38:38.685603', 'step': 15623, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:38.738371', 'step': 15623, 'epoch': 3} {'type': 'loss', 'content': 0.1877487748861313, 'timestamp': '2025-10-01 04:38:38.744087', 'step': 15624, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:38.796259', 'step': 15624, 'epoch': 3} {'type': 'loss', 'content': 0.10879099369049072, 'timestamp': '2025-10-01 04:38:38.798895', 'step': 15625, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:38.852830', 'step': 15625, 'epoch': 3} {'type': 'loss', 'content': 0.1302488148212433, 'timestamp': '2025-10-01 04:38:38.855129', 'step': 15626, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:38:38.908183', 'step': 15626, 'epoch': 3} {'type': 'loss', 'content': 0.17948393523693085, 'timestamp': '2025-10-01 04:38:38.911342', 'step': 15627, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:38.964560', 'step': 15627, 'epoch': 3} {'type': 'loss', 'content': 0.08417096734046936, 'timestamp': '2025-10-01 04:38:38.971490', 'step': 15628, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:39.024313', 'step': 15628, 'epoch': 3} {'type': 'loss', 'content': 0.07138536125421524, 'timestamp': '2025-10-01 04:38:39.041174', 'step': 15629, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:39.095653', 'step': 15629, 'epoch': 3} {'type': 'loss', 'content': 0.09338782727718353, 'timestamp': '2025-10-01 04:38:39.097832', 'step': 15630, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:39.150901', 'step': 15630, 'epoch': 3} {'type': 'loss', 'content': 0.10845284909009933, 'timestamp': '2025-10-01 04:38:39.153593', 'step': 15631, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:39.214590', 'step': 15631, 'epoch': 3} {'type': 'loss', 'content': 0.09030962735414505, 'timestamp': '2025-10-01 04:38:39.221369', 'step': 15632, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:38:39.275134', 'step': 15632, 'epoch': 3} {'type': 'loss', 'content': 0.08545832335948944, 'timestamp': '2025-10-01 04:38:39.277708', 'step': 15633, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:39.331194', 'step': 15633, 'epoch': 3} {'type': 'loss', 'content': 0.11659961193799973, 'timestamp': '2025-10-01 04:38:39.334449', 'step': 15634, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:39.388976', 'step': 15634, 'epoch': 3} {'type': 'loss', 'content': 0.06773784756660461, 'timestamp': '2025-10-01 04:38:39.392042', 'step': 15635, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:39.445784', 'step': 15635, 'epoch': 3} {'type': 'loss', 'content': 0.07266377657651901, 'timestamp': '2025-10-01 04:38:39.451513', 'step': 15636, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:38:39.505083', 'step': 15636, 'epoch': 3} {'type': 'loss', 'content': 0.05069270730018616, 'timestamp': '2025-10-01 04:38:39.507823', 'step': 15637, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:39.562086', 'step': 15637, 'epoch': 3} {'type': 'loss', 'content': 0.12423460930585861, 'timestamp': '2025-10-01 04:38:39.564585', 'step': 15638, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:39.618590', 'step': 15638, 'epoch': 3} {'type': 'loss', 'content': 0.15921278297901154, 'timestamp': '2025-10-01 04:38:39.621786', 'step': 15639, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:39.675680', 'step': 15639, 'epoch': 3} {'type': 'loss', 'content': 0.04111124947667122, 'timestamp': '2025-10-01 04:38:39.682853', 'step': 15640, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:39.735924', 'step': 15640, 'epoch': 3} {'type': 'loss', 'content': 0.0540730357170105, 'timestamp': '2025-10-01 04:38:39.738787', 'step': 15641, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:39.795642', 'step': 15641, 'epoch': 3} {'type': 'loss', 'content': 0.13742734491825104, 'timestamp': '2025-10-01 04:38:39.798816', 'step': 15642, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:39.854233', 'step': 15642, 'epoch': 3} {'type': 'loss', 'content': 0.11409492790699005, 'timestamp': '2025-10-01 04:38:39.857111', 'step': 15643, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:38:39.910541', 'step': 15643, 'epoch': 3} {'type': 'loss', 'content': 0.0352165661752224, 'timestamp': '2025-10-01 04:38:39.917006', 'step': 15644, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:39.970993', 'step': 15644, 'epoch': 3} {'type': 'loss', 'content': 0.11634210497140884, 'timestamp': '2025-10-01 04:38:39.974953', 'step': 15645, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:40.029953', 'step': 15645, 'epoch': 3} {'type': 'loss', 'content': 0.10287052392959595, 'timestamp': '2025-10-01 04:38:40.032471', 'step': 15646, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:40.086116', 'step': 15646, 'epoch': 3} {'type': 'loss', 'content': 0.14211229979991913, 'timestamp': '2025-10-01 04:38:40.088789', 'step': 15647, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:40.144667', 'step': 15647, 'epoch': 3} {'type': 'loss', 'content': 0.04443920776247978, 'timestamp': '2025-10-01 04:38:40.150625', 'step': 15648, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:40.203392', 'step': 15648, 'epoch': 3} {'type': 'loss', 'content': 0.10556349903345108, 'timestamp': '2025-10-01 04:38:40.206014', 'step': 15649, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:40.264163', 'step': 15649, 'epoch': 3} {'type': 'loss', 'content': 0.1281445324420929, 'timestamp': '2025-10-01 04:38:40.269506', 'step': 15650, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:40.330311', 'step': 15650, 'epoch': 3} {'type': 'loss', 'content': 0.12194034457206726, 'timestamp': '2025-10-01 04:38:40.333989', 'step': 15651, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:40.392449', 'step': 15651, 'epoch': 3} {'type': 'loss', 'content': 0.07370680570602417, 'timestamp': '2025-10-01 04:38:40.398640', 'step': 15652, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:40.454595', 'step': 15652, 'epoch': 3} {'type': 'loss', 'content': 0.13792388141155243, 'timestamp': '2025-10-01 04:38:40.457079', 'step': 15653, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:40.510420', 'step': 15653, 'epoch': 3} {'type': 'loss', 'content': 0.13513121008872986, 'timestamp': '2025-10-01 04:38:40.512768', 'step': 15654, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:40.567495', 'step': 15654, 'epoch': 3} {'type': 'loss', 'content': 0.1865762621164322, 'timestamp': '2025-10-01 04:38:40.570274', 'step': 15655, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:38:40.623778', 'step': 15655, 'epoch': 3} {'type': 'loss', 'content': 0.042138781398534775, 'timestamp': '2025-10-01 04:38:40.630099', 'step': 15656, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:38:40.683716', 'step': 15656, 'epoch': 3} {'type': 'loss', 'content': 0.09670107811689377, 'timestamp': '2025-10-01 04:38:40.685957', 'step': 15657, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:40.738904', 'step': 15657, 'epoch': 3} {'type': 'loss', 'content': 0.08500386774539948, 'timestamp': '2025-10-01 04:38:40.741609', 'step': 15658, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:40.795230', 'step': 15658, 'epoch': 3} {'type': 'loss', 'content': 0.10508318245410919, 'timestamp': '2025-10-01 04:38:40.799934', 'step': 15659, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:40.854946', 'step': 15659, 'epoch': 3} {'type': 'loss', 'content': 0.04022959619760513, 'timestamp': '2025-10-01 04:38:40.860935', 'step': 15660, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:40.913711', 'step': 15660, 'epoch': 3} {'type': 'loss', 'content': 0.1009359359741211, 'timestamp': '2025-10-01 04:38:40.916208', 'step': 15661, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:40.969137', 'step': 15661, 'epoch': 3} {'type': 'loss', 'content': 0.0797257199883461, 'timestamp': '2025-10-01 04:38:40.976561', 'step': 15662, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:41.031426', 'step': 15662, 'epoch': 3} {'type': 'loss', 'content': 0.18039846420288086, 'timestamp': '2025-10-01 04:38:41.034197', 'step': 15663, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:41.087462', 'step': 15663, 'epoch': 3} {'type': 'loss', 'content': 0.14354243874549866, 'timestamp': '2025-10-01 04:38:41.093477', 'step': 15664, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:41.146361', 'step': 15664, 'epoch': 3} {'type': 'loss', 'content': 0.027446819469332695, 'timestamp': '2025-10-01 04:38:41.148543', 'step': 15665, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:41.202980', 'step': 15665, 'epoch': 3} {'type': 'loss', 'content': 0.09064464271068573, 'timestamp': '2025-10-01 04:38:41.205283', 'step': 15666, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:38:41.258583', 'step': 15666, 'epoch': 3} {'type': 'loss', 'content': 0.1453845500946045, 'timestamp': '2025-10-01 04:38:41.260785', 'step': 15667, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:41.314364', 'step': 15667, 'epoch': 3} {'type': 'loss', 'content': 0.11638813465833664, 'timestamp': '2025-10-01 04:38:41.320264', 'step': 15668, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:38:41.373885', 'step': 15668, 'epoch': 3} {'type': 'loss', 'content': 0.12698228657245636, 'timestamp': '2025-10-01 04:38:41.376145', 'step': 15669, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:41.429161', 'step': 15669, 'epoch': 3} {'type': 'loss', 'content': 0.10147088766098022, 'timestamp': '2025-10-01 04:38:41.431409', 'step': 15670, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:41.484685', 'step': 15670, 'epoch': 3} {'type': 'loss', 'content': 0.14718246459960938, 'timestamp': '2025-10-01 04:38:41.487558', 'step': 15671, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:41.540699', 'step': 15671, 'epoch': 3} {'type': 'loss', 'content': 0.13633385300636292, 'timestamp': '2025-10-01 04:38:41.547146', 'step': 15672, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:41.607430', 'step': 15672, 'epoch': 3} {'type': 'loss', 'content': 0.12208064645528793, 'timestamp': '2025-10-01 04:38:41.609889', 'step': 15673, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:41.663955', 'step': 15673, 'epoch': 3} {'type': 'loss', 'content': 0.16146738827228546, 'timestamp': '2025-10-01 04:38:41.666255', 'step': 15674, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:41.719530', 'step': 15674, 'epoch': 3} {'type': 'loss', 'content': 0.07549545913934708, 'timestamp': '2025-10-01 04:38:41.721899', 'step': 15675, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:41.775030', 'step': 15675, 'epoch': 3} {'type': 'loss', 'content': 0.1135597974061966, 'timestamp': '2025-10-01 04:38:41.781295', 'step': 15676, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:41.833696', 'step': 15676, 'epoch': 3} {'type': 'loss', 'content': 0.1197778731584549, 'timestamp': '2025-10-01 04:38:41.842809', 'step': 15677, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:41.897173', 'step': 15677, 'epoch': 3} {'type': 'loss', 'content': 0.27017679810523987, 'timestamp': '2025-10-01 04:38:41.901331', 'step': 15678, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:41.956895', 'step': 15678, 'epoch': 3} {'type': 'loss', 'content': 0.03438958153128624, 'timestamp': '2025-10-01 04:38:41.959047', 'step': 15679, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:42.012366', 'step': 15679, 'epoch': 3} {'type': 'loss', 'content': 0.04469813406467438, 'timestamp': '2025-10-01 04:38:42.018237', 'step': 15680, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:42.070863', 'step': 15680, 'epoch': 3} {'type': 'loss', 'content': 0.11504030972719193, 'timestamp': '2025-10-01 04:38:42.073054', 'step': 15681, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:42.128996', 'step': 15681, 'epoch': 3} {'type': 'loss', 'content': 0.09435904771089554, 'timestamp': '2025-10-01 04:38:42.131100', 'step': 15682, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:42.186589', 'step': 15682, 'epoch': 3} {'type': 'loss', 'content': 0.1549861878156662, 'timestamp': '2025-10-01 04:38:42.192660', 'step': 15683, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:42.245890', 'step': 15683, 'epoch': 3} {'type': 'loss', 'content': 0.052818797528743744, 'timestamp': '2025-10-01 04:38:42.251898', 'step': 15684, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:42.305015', 'step': 15684, 'epoch': 3} {'type': 'loss', 'content': 0.21587322652339935, 'timestamp': '2025-10-01 04:38:42.307147', 'step': 15685, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:42.360115', 'step': 15685, 'epoch': 3} {'type': 'loss', 'content': 0.14144080877304077, 'timestamp': '2025-10-01 04:38:42.362438', 'step': 15686, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:38:42.415415', 'step': 15686, 'epoch': 3} {'type': 'loss', 'content': 0.04177871346473694, 'timestamp': '2025-10-01 04:38:42.417769', 'step': 15687, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:42.482595', 'step': 15687, 'epoch': 3} {'type': 'loss', 'content': 0.12487494945526123, 'timestamp': '2025-10-01 04:38:42.490254', 'step': 15688, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:42.543276', 'step': 15688, 'epoch': 3} {'type': 'loss', 'content': 0.11140130460262299, 'timestamp': '2025-10-01 04:38:42.545694', 'step': 15689, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:42.599355', 'step': 15689, 'epoch': 3} {'type': 'loss', 'content': 0.09541936218738556, 'timestamp': '2025-10-01 04:38:42.601710', 'step': 15690, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:42.658280', 'step': 15690, 'epoch': 3} {'type': 'loss', 'content': 0.08484441041946411, 'timestamp': '2025-10-01 04:38:42.660639', 'step': 15691, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:38:42.713727', 'step': 15691, 'epoch': 3} {'type': 'loss', 'content': 0.053415484726428986, 'timestamp': '2025-10-01 04:38:42.719484', 'step': 15692, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:42.772858', 'step': 15692, 'epoch': 3} {'type': 'loss', 'content': 0.086433544754982, 'timestamp': '2025-10-01 04:38:42.775163', 'step': 15693, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:42.828335', 'step': 15693, 'epoch': 3} {'type': 'loss', 'content': 0.10044824331998825, 'timestamp': '2025-10-01 04:38:42.830579', 'step': 15694, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:38:42.883943', 'step': 15694, 'epoch': 3} {'type': 'loss', 'content': 0.04510337859392166, 'timestamp': '2025-10-01 04:38:42.886164', 'step': 15695, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:42.939254', 'step': 15695, 'epoch': 3} {'type': 'loss', 'content': 0.07248767465353012, 'timestamp': '2025-10-01 04:38:42.945083', 'step': 15696, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:42.997737', 'step': 15696, 'epoch': 3} {'type': 'loss', 'content': 0.14666403830051422, 'timestamp': '2025-10-01 04:38:43.002897', 'step': 15697, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:38:43.056242', 'step': 15697, 'epoch': 3} {'type': 'loss', 'content': 0.04072266072034836, 'timestamp': '2025-10-01 04:38:43.058945', 'step': 15698, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:43.111948', 'step': 15698, 'epoch': 3} {'type': 'loss', 'content': 0.1546873152256012, 'timestamp': '2025-10-01 04:38:43.114246', 'step': 15699, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:43.168045', 'step': 15699, 'epoch': 3} {'type': 'loss', 'content': 0.1297275424003601, 'timestamp': '2025-10-01 04:38:43.174257', 'step': 15700, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:43.226496', 'step': 15700, 'epoch': 3} {'type': 'loss', 'content': 0.05806966871023178, 'timestamp': '2025-10-01 04:38:43.228687', 'step': 15701, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:38:43.282182', 'step': 15701, 'epoch': 3} {'type': 'loss', 'content': 0.0920230895280838, 'timestamp': '2025-10-01 04:38:43.284507', 'step': 15702, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:43.338124', 'step': 15702, 'epoch': 3} {'type': 'loss', 'content': 0.07767537981271744, 'timestamp': '2025-10-01 04:38:43.340596', 'step': 15703, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:38:43.394855', 'step': 15703, 'epoch': 3} {'type': 'loss', 'content': 0.09208033233880997, 'timestamp': '2025-10-01 04:38:43.400780', 'step': 15704, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:43.453399', 'step': 15704, 'epoch': 3} {'type': 'loss', 'content': 0.06728712469339371, 'timestamp': '2025-10-01 04:38:43.455628', 'step': 15705, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:43.510061', 'step': 15705, 'epoch': 3} {'type': 'loss', 'content': 0.19364230334758759, 'timestamp': '2025-10-01 04:38:43.512339', 'step': 15706, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:43.568662', 'step': 15706, 'epoch': 3} {'type': 'loss', 'content': 0.15158654749393463, 'timestamp': '2025-10-01 04:38:43.571600', 'step': 15707, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:43.626103', 'step': 15707, 'epoch': 3} {'type': 'loss', 'content': 0.060487549751996994, 'timestamp': '2025-10-01 04:38:43.631934', 'step': 15708, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:43.685718', 'step': 15708, 'epoch': 3} {'type': 'loss', 'content': 0.0577104352414608, 'timestamp': '2025-10-01 04:38:43.688343', 'step': 15709, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:38:43.741962', 'step': 15709, 'epoch': 3} {'type': 'loss', 'content': 0.16515135765075684, 'timestamp': '2025-10-01 04:38:43.744124', 'step': 15710, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:43.798669', 'step': 15710, 'epoch': 3} {'type': 'loss', 'content': 0.12743891775608063, 'timestamp': '2025-10-01 04:38:43.801003', 'step': 15711, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:43.855899', 'step': 15711, 'epoch': 3} {'type': 'loss', 'content': 0.09645022451877594, 'timestamp': '2025-10-01 04:38:43.862365', 'step': 15712, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:43.915874', 'step': 15712, 'epoch': 3} {'type': 'loss', 'content': 0.11372503638267517, 'timestamp': '2025-10-01 04:38:43.918059', 'step': 15713, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:43.972796', 'step': 15713, 'epoch': 3} {'type': 'loss', 'content': 0.12780435383319855, 'timestamp': '2025-10-01 04:38:43.975058', 'step': 15714, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:44.029883', 'step': 15714, 'epoch': 3} {'type': 'loss', 'content': 0.08101941645145416, 'timestamp': '2025-10-01 04:38:44.032093', 'step': 15715, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:44.097087', 'step': 15715, 'epoch': 3} {'type': 'loss', 'content': 0.10736516118049622, 'timestamp': '2025-10-01 04:38:44.103662', 'step': 15716, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:44.157649', 'step': 15716, 'epoch': 3} {'type': 'loss', 'content': 0.07267927378416061, 'timestamp': '2025-10-01 04:38:44.166304', 'step': 15717, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:44.220445', 'step': 15717, 'epoch': 3} {'type': 'loss', 'content': 0.05649534985423088, 'timestamp': '2025-10-01 04:38:44.222801', 'step': 15718, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:44.276929', 'step': 15718, 'epoch': 3} {'type': 'loss', 'content': 0.10267321765422821, 'timestamp': '2025-10-01 04:38:44.278930', 'step': 15719, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:44.332441', 'step': 15719, 'epoch': 3} {'type': 'loss', 'content': 0.05851634591817856, 'timestamp': '2025-10-01 04:38:44.338673', 'step': 15720, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:44.397216', 'step': 15720, 'epoch': 3} {'type': 'loss', 'content': 0.11608999222517014, 'timestamp': '2025-10-01 04:38:44.399451', 'step': 15721, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:44.452840', 'step': 15721, 'epoch': 3} {'type': 'loss', 'content': 0.09032361209392548, 'timestamp': '2025-10-01 04:38:44.455353', 'step': 15722, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:44.509692', 'step': 15722, 'epoch': 3} {'type': 'loss', 'content': 0.16689197719097137, 'timestamp': '2025-10-01 04:38:44.512082', 'step': 15723, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:44.565344', 'step': 15723, 'epoch': 3} {'type': 'loss', 'content': 0.0638270378112793, 'timestamp': '2025-10-01 04:38:44.573066', 'step': 15724, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:44.626308', 'step': 15724, 'epoch': 3} {'type': 'loss', 'content': 0.017604917287826538, 'timestamp': '2025-10-01 04:38:44.628454', 'step': 15725, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:44.684101', 'step': 15725, 'epoch': 3} {'type': 'loss', 'content': 0.0930597111582756, 'timestamp': '2025-10-01 04:38:44.686265', 'step': 15726, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:38:44.739507', 'step': 15726, 'epoch': 3} {'type': 'loss', 'content': 0.0928959995508194, 'timestamp': '2025-10-01 04:38:44.741829', 'step': 15727, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:38:44.795708', 'step': 15727, 'epoch': 3} {'type': 'loss', 'content': 0.15121980011463165, 'timestamp': '2025-10-01 04:38:44.801790', 'step': 15728, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:44.855613', 'step': 15728, 'epoch': 3} {'type': 'loss', 'content': 0.13981439173221588, 'timestamp': '2025-10-01 04:38:44.858109', 'step': 15729, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:44.923061', 'step': 15729, 'epoch': 3} {'type': 'loss', 'content': 0.1094907894730568, 'timestamp': '2025-10-01 04:38:44.925334', 'step': 15730, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:44.979556', 'step': 15730, 'epoch': 3} {'type': 'loss', 'content': 0.057680971920490265, 'timestamp': '2025-10-01 04:38:44.981951', 'step': 15731, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:45.036395', 'step': 15731, 'epoch': 3} {'type': 'loss', 'content': 0.1761917918920517, 'timestamp': '2025-10-01 04:38:45.043253', 'step': 15732, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:45.097042', 'step': 15732, 'epoch': 3} {'type': 'loss', 'content': 0.04752316698431969, 'timestamp': '2025-10-01 04:38:45.099505', 'step': 15733, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:45.153663', 'step': 15733, 'epoch': 3} {'type': 'loss', 'content': 0.07935438305139542, 'timestamp': '2025-10-01 04:38:45.168873', 'step': 15734, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:38:45.223880', 'step': 15734, 'epoch': 3} {'type': 'loss', 'content': 0.07291717827320099, 'timestamp': '2025-10-01 04:38:45.226110', 'step': 15735, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:45.280197', 'step': 15735, 'epoch': 3} {'type': 'loss', 'content': 0.07515587657690048, 'timestamp': '2025-10-01 04:38:45.286664', 'step': 15736, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:45.343567', 'step': 15736, 'epoch': 3} {'type': 'loss', 'content': 0.14016345143318176, 'timestamp': '2025-10-01 04:38:45.346164', 'step': 15737, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:45.407778', 'step': 15737, 'epoch': 3} {'type': 'loss', 'content': 0.09786500781774521, 'timestamp': '2025-10-01 04:38:45.410090', 'step': 15738, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:45.467553', 'step': 15738, 'epoch': 3} {'type': 'loss', 'content': 0.14512601494789124, 'timestamp': '2025-10-01 04:38:45.469962', 'step': 15739, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:45.540422', 'step': 15739, 'epoch': 3} {'type': 'loss', 'content': 0.11121433973312378, 'timestamp': '2025-10-01 04:38:45.547330', 'step': 15740, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:38:45.605398', 'step': 15740, 'epoch': 3} {'type': 'loss', 'content': 0.0991002768278122, 'timestamp': '2025-10-01 04:38:45.607676', 'step': 15741, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:45.681735', 'step': 15741, 'epoch': 3} {'type': 'loss', 'content': 0.07368186861276627, 'timestamp': '2025-10-01 04:38:45.684015', 'step': 15742, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:45.742684', 'step': 15742, 'epoch': 3} {'type': 'loss', 'content': 0.08740886300802231, 'timestamp': '2025-10-01 04:38:45.745200', 'step': 15743, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:45.804839', 'step': 15743, 'epoch': 3} {'type': 'loss', 'content': 0.08527424931526184, 'timestamp': '2025-10-01 04:38:45.814330', 'step': 15744, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:38:45.873548', 'step': 15744, 'epoch': 3} {'type': 'loss', 'content': 0.1382724940776825, 'timestamp': '2025-10-01 04:38:45.877617', 'step': 15745, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:45.935200', 'step': 15745, 'epoch': 3} {'type': 'loss', 'content': 0.07769186794757843, 'timestamp': '2025-10-01 04:38:45.937955', 'step': 15746, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:46.009700', 'step': 15746, 'epoch': 3} {'type': 'loss', 'content': 0.21579407155513763, 'timestamp': '2025-10-01 04:38:46.012144', 'step': 15747, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:46.071197', 'step': 15747, 'epoch': 3} {'type': 'loss', 'content': 0.1356808990240097, 'timestamp': '2025-10-01 04:38:46.078270', 'step': 15748, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:38:46.135299', 'step': 15748, 'epoch': 3} {'type': 'loss', 'content': 0.09073027223348618, 'timestamp': '2025-10-01 04:38:46.137966', 'step': 15749, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:46.192133', 'step': 15749, 'epoch': 3} {'type': 'loss', 'content': 0.06582760065793991, 'timestamp': '2025-10-01 04:38:46.194326', 'step': 15750, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:46.248783', 'step': 15750, 'epoch': 3} {'type': 'loss', 'content': 0.14268049597740173, 'timestamp': '2025-10-01 04:38:46.251446', 'step': 15751, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:46.306481', 'step': 15751, 'epoch': 3} {'type': 'loss', 'content': 0.10063544660806656, 'timestamp': '2025-10-01 04:38:46.313010', 'step': 15752, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:46.367709', 'step': 15752, 'epoch': 3} {'type': 'loss', 'content': 0.06499294191598892, 'timestamp': '2025-10-01 04:38:46.369884', 'step': 15753, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:46.427989', 'step': 15753, 'epoch': 3} {'type': 'loss', 'content': 0.08295082300901413, 'timestamp': '2025-10-01 04:38:46.435927', 'step': 15754, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:38:46.500192', 'step': 15754, 'epoch': 3} {'type': 'loss', 'content': 0.06766611337661743, 'timestamp': '2025-10-01 04:38:46.502541', 'step': 15755, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:38:46.557838', 'step': 15755, 'epoch': 3} {'type': 'loss', 'content': 0.04635479673743248, 'timestamp': '2025-10-01 04:38:46.564719', 'step': 15756, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:46.623130', 'step': 15756, 'epoch': 3} {'type': 'loss', 'content': 0.1078919768333435, 'timestamp': '2025-10-01 04:38:46.625478', 'step': 15757, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:38:46.680193', 'step': 15757, 'epoch': 3} {'type': 'loss', 'content': 0.08642199635505676, 'timestamp': '2025-10-01 04:38:46.682444', 'step': 15758, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:46.737864', 'step': 15758, 'epoch': 3} {'type': 'loss', 'content': 0.1862078458070755, 'timestamp': '2025-10-01 04:38:46.740079', 'step': 15759, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:46.794874', 'step': 15759, 'epoch': 3} {'type': 'loss', 'content': 0.13236640393733978, 'timestamp': '2025-10-01 04:38:46.801412', 'step': 15760, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:46.855036', 'step': 15760, 'epoch': 3} {'type': 'loss', 'content': 0.13798660039901733, 'timestamp': '2025-10-01 04:38:46.857364', 'step': 15761, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:46.911358', 'step': 15761, 'epoch': 3} {'type': 'loss', 'content': 0.03954188898205757, 'timestamp': '2025-10-01 04:38:46.913514', 'step': 15762, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:46.975636', 'step': 15762, 'epoch': 3} {'type': 'loss', 'content': 0.1388646811246872, 'timestamp': '2025-10-01 04:38:46.977923', 'step': 15763, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:47.039903', 'step': 15763, 'epoch': 3} {'type': 'loss', 'content': 0.10098827630281448, 'timestamp': '2025-10-01 04:38:47.046294', 'step': 15764, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:47.106288', 'step': 15764, 'epoch': 3} {'type': 'loss', 'content': 0.12293851375579834, 'timestamp': '2025-10-01 04:38:47.110606', 'step': 15765, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:47.175624', 'step': 15765, 'epoch': 3} {'type': 'loss', 'content': 0.12073680758476257, 'timestamp': '2025-10-01 04:38:47.178015', 'step': 15766, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:47.244867', 'step': 15766, 'epoch': 3} {'type': 'loss', 'content': 0.05504349619150162, 'timestamp': '2025-10-01 04:38:47.247130', 'step': 15767, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:38:47.302053', 'step': 15767, 'epoch': 3} {'type': 'loss', 'content': 0.09929603338241577, 'timestamp': '2025-10-01 04:38:47.308558', 'step': 15768, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:47.361840', 'step': 15768, 'epoch': 3} {'type': 'loss', 'content': 0.10198107361793518, 'timestamp': '2025-10-01 04:38:47.364895', 'step': 15769, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:47.418803', 'step': 15769, 'epoch': 3} {'type': 'loss', 'content': 0.10355236381292343, 'timestamp': '2025-10-01 04:38:47.421335', 'step': 15770, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:47.479019', 'step': 15770, 'epoch': 3} {'type': 'loss', 'content': 0.1164129376411438, 'timestamp': '2025-10-01 04:38:47.485344', 'step': 15771, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:47.539100', 'step': 15771, 'epoch': 3} {'type': 'loss', 'content': 0.0845792219042778, 'timestamp': '2025-10-01 04:38:47.546340', 'step': 15772, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:47.598863', 'step': 15772, 'epoch': 3} {'type': 'loss', 'content': 0.013672770000994205, 'timestamp': '2025-10-01 04:38:47.601728', 'step': 15773, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:38:47.655867', 'step': 15773, 'epoch': 3} {'type': 'loss', 'content': 0.1163366511464119, 'timestamp': '2025-10-01 04:38:47.657971', 'step': 15774, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:47.711954', 'step': 15774, 'epoch': 3} {'type': 'loss', 'content': 0.08583152294158936, 'timestamp': '2025-10-01 04:38:47.714379', 'step': 15775, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:47.768188', 'step': 15775, 'epoch': 3} {'type': 'loss', 'content': 0.09583529829978943, 'timestamp': '2025-10-01 04:38:47.774136', 'step': 15776, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:47.827563', 'step': 15776, 'epoch': 3} {'type': 'loss', 'content': 0.0969005599617958, 'timestamp': '2025-10-01 04:38:47.829640', 'step': 15777, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:47.889356', 'step': 15777, 'epoch': 3} {'type': 'loss', 'content': 0.14247022569179535, 'timestamp': '2025-10-01 04:38:47.895934', 'step': 15778, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:47.955194', 'step': 15778, 'epoch': 3} {'type': 'loss', 'content': 0.04098261892795563, 'timestamp': '2025-10-01 04:38:47.957309', 'step': 15779, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:38:48.010804', 'step': 15779, 'epoch': 3} {'type': 'loss', 'content': 0.09151025861501694, 'timestamp': '2025-10-01 04:38:48.016503', 'step': 15780, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:38:48.069832', 'step': 15780, 'epoch': 3} {'type': 'loss', 'content': 0.048279035836458206, 'timestamp': '2025-10-01 04:38:48.072854', 'step': 15781, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:48.126060', 'step': 15781, 'epoch': 3} {'type': 'loss', 'content': 0.06350646167993546, 'timestamp': '2025-10-01 04:38:48.128253', 'step': 15782, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:38:48.186341', 'step': 15782, 'epoch': 3} {'type': 'loss', 'content': 0.2305268794298172, 'timestamp': '2025-10-01 04:38:48.188441', 'step': 15783, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:38:48.241924', 'step': 15783, 'epoch': 3} {'type': 'loss', 'content': 0.09093213081359863, 'timestamp': '2025-10-01 04:38:48.247980', 'step': 15784, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:48.308445', 'step': 15784, 'epoch': 3} {'type': 'loss', 'content': 0.14889536798000336, 'timestamp': '2025-10-01 04:38:48.311038', 'step': 15785, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:48.365276', 'step': 15785, 'epoch': 3} {'type': 'loss', 'content': 0.1277276575565338, 'timestamp': '2025-10-01 04:38:48.367990', 'step': 15786, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:48.423419', 'step': 15786, 'epoch': 3} {'type': 'loss', 'content': 0.08309202641248703, 'timestamp': '2025-10-01 04:38:48.425830', 'step': 15787, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:48.482355', 'step': 15787, 'epoch': 3} {'type': 'loss', 'content': 0.1603471040725708, 'timestamp': '2025-10-01 04:38:48.488272', 'step': 15788, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:48.543609', 'step': 15788, 'epoch': 3} {'type': 'loss', 'content': 0.12785406410694122, 'timestamp': '2025-10-01 04:38:48.545948', 'step': 15789, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:48.599721', 'step': 15789, 'epoch': 3} {'type': 'loss', 'content': 0.14338728785514832, 'timestamp': '2025-10-01 04:38:48.603068', 'step': 15790, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:48.657507', 'step': 15790, 'epoch': 3} {'type': 'loss', 'content': 0.04854443296790123, 'timestamp': '2025-10-01 04:38:48.659992', 'step': 15791, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:38:48.714103', 'step': 15791, 'epoch': 3} {'type': 'loss', 'content': 0.11383770406246185, 'timestamp': '2025-10-01 04:38:48.720306', 'step': 15792, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:38:48.785767', 'step': 15792, 'epoch': 3} {'type': 'loss', 'content': 0.08776381611824036, 'timestamp': '2025-10-01 04:38:48.788259', 'step': 15793, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:48.842286', 'step': 15793, 'epoch': 3} {'type': 'loss', 'content': 0.20304323732852936, 'timestamp': '2025-10-01 04:38:48.844599', 'step': 15794, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:48.900061', 'step': 15794, 'epoch': 3} {'type': 'loss', 'content': 0.0729646384716034, 'timestamp': '2025-10-01 04:38:48.902651', 'step': 15795, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:48.964964', 'step': 15795, 'epoch': 3} {'type': 'loss', 'content': 0.11657200008630753, 'timestamp': '2025-10-01 04:38:48.971173', 'step': 15796, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:49.037374', 'step': 15796, 'epoch': 3} {'type': 'loss', 'content': 0.1148027628660202, 'timestamp': '2025-10-01 04:38:49.039890', 'step': 15797, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:49.093817', 'step': 15797, 'epoch': 3} {'type': 'loss', 'content': 0.128028005361557, 'timestamp': '2025-10-01 04:38:49.096107', 'step': 15798, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:49.150780', 'step': 15798, 'epoch': 3} {'type': 'loss', 'content': 0.08113853633403778, 'timestamp': '2025-10-01 04:38:49.155915', 'step': 15799, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:49.210407', 'step': 15799, 'epoch': 3} {'type': 'loss', 'content': 0.06764522194862366, 'timestamp': '2025-10-01 04:38:49.216329', 'step': 15800, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:49.270527', 'step': 15800, 'epoch': 3} {'type': 'loss', 'content': 0.04682048782706261, 'timestamp': '2025-10-01 04:38:49.273419', 'step': 15801, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:49.327892', 'step': 15801, 'epoch': 3} {'type': 'loss', 'content': 0.14707502722740173, 'timestamp': '2025-10-01 04:38:49.330306', 'step': 15802, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:49.385858', 'step': 15802, 'epoch': 3} {'type': 'loss', 'content': 0.10320016741752625, 'timestamp': '2025-10-01 04:38:49.388177', 'step': 15803, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:49.441477', 'step': 15803, 'epoch': 3} {'type': 'loss', 'content': 0.13421890139579773, 'timestamp': '2025-10-01 04:38:49.448280', 'step': 15804, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:49.502930', 'step': 15804, 'epoch': 3} {'type': 'loss', 'content': 0.09926894307136536, 'timestamp': '2025-10-01 04:38:49.505243', 'step': 15805, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:49.559409', 'step': 15805, 'epoch': 3} {'type': 'loss', 'content': 0.21994474530220032, 'timestamp': '2025-10-01 04:38:49.561911', 'step': 15806, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:38:49.634987', 'step': 15806, 'epoch': 3} {'type': 'loss', 'content': 0.15869829058647156, 'timestamp': '2025-10-01 04:38:49.637380', 'step': 15807, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:49.710439', 'step': 15807, 'epoch': 3} {'type': 'loss', 'content': 0.15571415424346924, 'timestamp': '2025-10-01 04:38:49.716681', 'step': 15808, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:49.770721', 'step': 15808, 'epoch': 3} {'type': 'loss', 'content': 0.24249787628650665, 'timestamp': '2025-10-01 04:38:49.773457', 'step': 15809, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:38:49.827682', 'step': 15809, 'epoch': 3} {'type': 'loss', 'content': 0.03885595500469208, 'timestamp': '2025-10-01 04:38:49.830294', 'step': 15810, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:49.886429', 'step': 15810, 'epoch': 3} {'type': 'loss', 'content': 0.12588119506835938, 'timestamp': '2025-10-01 04:38:49.888602', 'step': 15811, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:49.942735', 'step': 15811, 'epoch': 3} {'type': 'loss', 'content': 0.06910928338766098, 'timestamp': '2025-10-01 04:38:49.948470', 'step': 15812, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:50.001745', 'step': 15812, 'epoch': 3} {'type': 'loss', 'content': 0.11739052087068558, 'timestamp': '2025-10-01 04:38:50.004134', 'step': 15813, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:50.058679', 'step': 15813, 'epoch': 3} {'type': 'loss', 'content': 0.10229671001434326, 'timestamp': '2025-10-01 04:38:50.064319', 'step': 15814, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:50.118862', 'step': 15814, 'epoch': 3} {'type': 'loss', 'content': 0.07481329143047333, 'timestamp': '2025-10-01 04:38:50.120962', 'step': 15815, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:50.174479', 'step': 15815, 'epoch': 3} {'type': 'loss', 'content': 0.09510042518377304, 'timestamp': '2025-10-01 04:38:50.180251', 'step': 15816, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:38:50.233363', 'step': 15816, 'epoch': 3} {'type': 'loss', 'content': 0.08918063342571259, 'timestamp': '2025-10-01 04:38:50.236539', 'step': 15817, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:50.293496', 'step': 15817, 'epoch': 3} {'type': 'loss', 'content': 0.07498448342084885, 'timestamp': '2025-10-01 04:38:50.295736', 'step': 15818, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:50.349431', 'step': 15818, 'epoch': 3} {'type': 'loss', 'content': 0.1381722092628479, 'timestamp': '2025-10-01 04:38:50.351549', 'step': 15819, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:50.404563', 'step': 15819, 'epoch': 3} {'type': 'loss', 'content': 0.21255889534950256, 'timestamp': '2025-10-01 04:38:50.410327', 'step': 15820, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:50.462630', 'step': 15820, 'epoch': 3} {'type': 'loss', 'content': 0.06865392625331879, 'timestamp': '2025-10-01 04:38:50.464804', 'step': 15821, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:50.518884', 'step': 15821, 'epoch': 3} {'type': 'loss', 'content': 0.09952981770038605, 'timestamp': '2025-10-01 04:38:50.521146', 'step': 15822, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:38:50.574465', 'step': 15822, 'epoch': 3} {'type': 'loss', 'content': 0.05924556404352188, 'timestamp': '2025-10-01 04:38:50.576700', 'step': 15823, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:50.629943', 'step': 15823, 'epoch': 3} {'type': 'loss', 'content': 0.08480090647935867, 'timestamp': '2025-10-01 04:38:50.635694', 'step': 15824, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:38:50.688595', 'step': 15824, 'epoch': 3} {'type': 'loss', 'content': 0.14343330264091492, 'timestamp': '2025-10-01 04:38:50.699430', 'step': 15825, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:38:50.753495', 'step': 15825, 'epoch': 3} {'type': 'loss', 'content': 0.10959773510694504, 'timestamp': '2025-10-01 04:38:50.757087', 'step': 15826, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:50.811155', 'step': 15826, 'epoch': 3} {'type': 'loss', 'content': 0.10216314345598221, 'timestamp': '2025-10-01 04:38:50.813785', 'step': 15827, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:50.869387', 'step': 15827, 'epoch': 3} {'type': 'loss', 'content': 0.07927265018224716, 'timestamp': '2025-10-01 04:38:50.875197', 'step': 15828, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:38:50.935531', 'step': 15828, 'epoch': 3} {'type': 'loss', 'content': 0.10776326805353165, 'timestamp': '2025-10-01 04:38:50.944546', 'step': 15829, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:51.020079', 'step': 15829, 'epoch': 3} {'type': 'loss', 'content': 0.0927916169166565, 'timestamp': '2025-10-01 04:38:51.022097', 'step': 15830, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:51.083649', 'step': 15830, 'epoch': 3} {'type': 'loss', 'content': 0.15431572496891022, 'timestamp': '2025-10-01 04:38:51.091552', 'step': 15831, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:51.145116', 'step': 15831, 'epoch': 3} {'type': 'loss', 'content': 0.09081694483757019, 'timestamp': '2025-10-01 04:38:51.151047', 'step': 15832, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:51.213688', 'step': 15832, 'epoch': 3} {'type': 'loss', 'content': 0.11631055921316147, 'timestamp': '2025-10-01 04:38:51.216096', 'step': 15833, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:51.279749', 'step': 15833, 'epoch': 3} {'type': 'loss', 'content': 0.06653787940740585, 'timestamp': '2025-10-01 04:38:51.281967', 'step': 15834, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-01 04:38:51.335838', 'step': 15834, 'epoch': 3} {'type': 'loss', 'content': 0.22030994296073914, 'timestamp': '2025-10-01 04:38:51.338393', 'step': 15835, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:51.397933', 'step': 15835, 'epoch': 3} {'type': 'loss', 'content': 0.10578902810811996, 'timestamp': '2025-10-01 04:38:51.404305', 'step': 15836, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:38:51.456996', 'step': 15836, 'epoch': 3} {'type': 'loss', 'content': 0.1836751401424408, 'timestamp': '2025-10-01 04:38:51.459655', 'step': 15837, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:51.513379', 'step': 15837, 'epoch': 3} {'type': 'loss', 'content': 0.11184711754322052, 'timestamp': '2025-10-01 04:38:51.515579', 'step': 15838, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:51.569410', 'step': 15838, 'epoch': 3} {'type': 'loss', 'content': 0.1660466343164444, 'timestamp': '2025-10-01 04:38:51.571636', 'step': 15839, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:51.627124', 'step': 15839, 'epoch': 3} {'type': 'loss', 'content': 0.11381971091032028, 'timestamp': '2025-10-01 04:38:51.632895', 'step': 15840, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:51.689081', 'step': 15840, 'epoch': 3} {'type': 'loss', 'content': 0.16060684621334076, 'timestamp': '2025-10-01 04:38:51.691116', 'step': 15841, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:38:51.752482', 'step': 15841, 'epoch': 3} {'type': 'loss', 'content': 0.10015226155519485, 'timestamp': '2025-10-01 04:38:51.754650', 'step': 15842, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:51.807372', 'step': 15842, 'epoch': 3} {'type': 'loss', 'content': 0.10606944561004639, 'timestamp': '2025-10-01 04:38:51.809449', 'step': 15843, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:38:51.862359', 'step': 15843, 'epoch': 3} {'type': 'loss', 'content': 0.1608501672744751, 'timestamp': '2025-10-01 04:38:51.868017', 'step': 15844, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-01 04:39:05.288893', 'step': 15844, 'epoch': 3} {'type': 'pplx', 'content': 9112.88338085339, 'timestamp': '2025-10-01 04:39:05.300024', 'step': 15844, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:39:05.354420', 'step': 15844, 'epoch': 3} {'type': 'loss', 'content': 0.07285428792238235, 'timestamp': '2025-10-01 04:39:05.356506', 'step': 15845, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:05.421151', 'step': 15845, 'epoch': 3} {'type': 'loss', 'content': 0.18765531480312347, 'timestamp': '2025-10-01 04:39:05.423495', 'step': 15846, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:05.483833', 'step': 15846, 'epoch': 3} {'type': 'loss', 'content': 0.13857439160346985, 'timestamp': '2025-10-01 04:39:05.486190', 'step': 15847, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:05.541057', 'step': 15847, 'epoch': 3} {'type': 'loss', 'content': 0.05856887251138687, 'timestamp': '2025-10-01 04:39:05.547145', 'step': 15848, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:39:05.600408', 'step': 15848, 'epoch': 3} {'type': 'loss', 'content': 0.0886736586689949, 'timestamp': '2025-10-01 04:39:05.602534', 'step': 15849, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:05.656117', 'step': 15849, 'epoch': 3} {'type': 'loss', 'content': 0.062476951628923416, 'timestamp': '2025-10-01 04:39:05.658305', 'step': 15850, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:05.711936', 'step': 15850, 'epoch': 3} {'type': 'loss', 'content': 0.08977145701646805, 'timestamp': '2025-10-01 04:39:05.714234', 'step': 15851, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:05.767564', 'step': 15851, 'epoch': 3} {'type': 'loss', 'content': 0.17430922389030457, 'timestamp': '2025-10-01 04:39:05.773558', 'step': 15852, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:05.826474', 'step': 15852, 'epoch': 3} {'type': 'loss', 'content': 0.06833157688379288, 'timestamp': '2025-10-01 04:39:05.829586', 'step': 15853, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:05.883629', 'step': 15853, 'epoch': 3} {'type': 'loss', 'content': 0.04277433827519417, 'timestamp': '2025-10-01 04:39:05.885954', 'step': 15854, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:05.940469', 'step': 15854, 'epoch': 3} {'type': 'loss', 'content': 0.10476667433977127, 'timestamp': '2025-10-01 04:39:05.943087', 'step': 15855, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:05.996706', 'step': 15855, 'epoch': 3} {'type': 'loss', 'content': 0.07682662457227707, 'timestamp': '2025-10-01 04:39:06.002853', 'step': 15856, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:06.060922', 'step': 15856, 'epoch': 3} {'type': 'loss', 'content': 0.11534067988395691, 'timestamp': '2025-10-01 04:39:06.063019', 'step': 15857, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:06.119133', 'step': 15857, 'epoch': 3} {'type': 'loss', 'content': 0.1157851442694664, 'timestamp': '2025-10-01 04:39:06.126907', 'step': 15858, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:39:06.181942', 'step': 15858, 'epoch': 3} {'type': 'loss', 'content': 0.14632892608642578, 'timestamp': '2025-10-01 04:39:06.184973', 'step': 15859, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:06.246189', 'step': 15859, 'epoch': 3} {'type': 'loss', 'content': 0.09787868708372116, 'timestamp': '2025-10-01 04:39:06.252072', 'step': 15860, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:06.308821', 'step': 15860, 'epoch': 3} {'type': 'loss', 'content': 0.10177656263113022, 'timestamp': '2025-10-01 04:39:06.311063', 'step': 15861, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:06.365537', 'step': 15861, 'epoch': 3} {'type': 'loss', 'content': 0.07033438980579376, 'timestamp': '2025-10-01 04:39:06.367613', 'step': 15862, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:06.424461', 'step': 15862, 'epoch': 3} {'type': 'loss', 'content': 0.12054210156202316, 'timestamp': '2025-10-01 04:39:06.432994', 'step': 15863, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:06.502863', 'step': 15863, 'epoch': 3} {'type': 'loss', 'content': 0.05539526045322418, 'timestamp': '2025-10-01 04:39:06.509033', 'step': 15864, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:06.569542', 'step': 15864, 'epoch': 3} {'type': 'loss', 'content': 0.07460755854845047, 'timestamp': '2025-10-01 04:39:06.571596', 'step': 15865, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:06.627620', 'step': 15865, 'epoch': 3} {'type': 'loss', 'content': 0.06263288855552673, 'timestamp': '2025-10-01 04:39:06.630073', 'step': 15866, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:06.684252', 'step': 15866, 'epoch': 3} {'type': 'loss', 'content': 0.1140991747379303, 'timestamp': '2025-10-01 04:39:06.686922', 'step': 15867, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:06.740182', 'step': 15867, 'epoch': 3} {'type': 'loss', 'content': 0.12004483491182327, 'timestamp': '2025-10-01 04:39:06.745709', 'step': 15868, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:39:06.798946', 'step': 15868, 'epoch': 3} {'type': 'loss', 'content': 0.11268125474452972, 'timestamp': '2025-10-01 04:39:06.800873', 'step': 15869, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:06.854253', 'step': 15869, 'epoch': 3} {'type': 'loss', 'content': 0.1081402525305748, 'timestamp': '2025-10-01 04:39:06.856286', 'step': 15870, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:06.909522', 'step': 15870, 'epoch': 3} {'type': 'loss', 'content': 0.1083037331700325, 'timestamp': '2025-10-01 04:39:06.911821', 'step': 15871, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:06.965005', 'step': 15871, 'epoch': 3} {'type': 'loss', 'content': 0.09292255342006683, 'timestamp': '2025-10-01 04:39:06.970780', 'step': 15872, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:07.026275', 'step': 15872, 'epoch': 3} {'type': 'loss', 'content': 0.10040228068828583, 'timestamp': '2025-10-01 04:39:07.029268', 'step': 15873, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:39:07.086412', 'step': 15873, 'epoch': 3} {'type': 'loss', 'content': 0.03264020383358002, 'timestamp': '2025-10-01 04:39:07.089228', 'step': 15874, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:07.143138', 'step': 15874, 'epoch': 3} {'type': 'loss', 'content': 0.10053903609514236, 'timestamp': '2025-10-01 04:39:07.145891', 'step': 15875, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:07.198959', 'step': 15875, 'epoch': 3} {'type': 'loss', 'content': 0.09451380372047424, 'timestamp': '2025-10-01 04:39:07.204453', 'step': 15876, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:07.258400', 'step': 15876, 'epoch': 3} {'type': 'loss', 'content': 0.11832654476165771, 'timestamp': '2025-10-01 04:39:07.260849', 'step': 15877, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:07.315484', 'step': 15877, 'epoch': 3} {'type': 'loss', 'content': 0.09538529068231583, 'timestamp': '2025-10-01 04:39:07.317684', 'step': 15878, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:07.371913', 'step': 15878, 'epoch': 3} {'type': 'loss', 'content': 0.15188175439834595, 'timestamp': '2025-10-01 04:39:07.374489', 'step': 15879, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:07.441489', 'step': 15879, 'epoch': 3} {'type': 'loss', 'content': 0.0994410365819931, 'timestamp': '2025-10-01 04:39:07.447361', 'step': 15880, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:07.502856', 'step': 15880, 'epoch': 3} {'type': 'loss', 'content': 0.0929347425699234, 'timestamp': '2025-10-01 04:39:07.505057', 'step': 15881, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:07.560319', 'step': 15881, 'epoch': 3} {'type': 'loss', 'content': 0.09429015964269638, 'timestamp': '2025-10-01 04:39:07.568075', 'step': 15882, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:07.621957', 'step': 15882, 'epoch': 3} {'type': 'loss', 'content': 0.1472814977169037, 'timestamp': '2025-10-01 04:39:07.624088', 'step': 15883, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:07.677858', 'step': 15883, 'epoch': 3} {'type': 'loss', 'content': 0.12510547041893005, 'timestamp': '2025-10-01 04:39:07.684078', 'step': 15884, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:39:07.737800', 'step': 15884, 'epoch': 3} {'type': 'loss', 'content': 0.09166016429662704, 'timestamp': '2025-10-01 04:39:07.742035', 'step': 15885, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:07.797800', 'step': 15885, 'epoch': 3} {'type': 'loss', 'content': 0.04859492555260658, 'timestamp': '2025-10-01 04:39:07.800428', 'step': 15886, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:39:07.861979', 'step': 15886, 'epoch': 3} {'type': 'loss', 'content': 0.0695793628692627, 'timestamp': '2025-10-01 04:39:07.874584', 'step': 15887, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:07.929907', 'step': 15887, 'epoch': 3} {'type': 'loss', 'content': 0.10543189942836761, 'timestamp': '2025-10-01 04:39:07.935884', 'step': 15888, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:07.990956', 'step': 15888, 'epoch': 3} {'type': 'loss', 'content': 0.061896272003650665, 'timestamp': '2025-10-01 04:39:07.993067', 'step': 15889, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:08.047536', 'step': 15889, 'epoch': 3} {'type': 'loss', 'content': 0.12407398223876953, 'timestamp': '2025-10-01 04:39:08.050355', 'step': 15890, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:08.104779', 'step': 15890, 'epoch': 3} {'type': 'loss', 'content': 0.09772519022226334, 'timestamp': '2025-10-01 04:39:08.107118', 'step': 15891, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:08.162326', 'step': 15891, 'epoch': 3} {'type': 'loss', 'content': 0.21850846707820892, 'timestamp': '2025-10-01 04:39:08.169184', 'step': 15892, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:08.223615', 'step': 15892, 'epoch': 3} {'type': 'loss', 'content': 0.0794023722410202, 'timestamp': '2025-10-01 04:39:08.232891', 'step': 15893, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:08.287493', 'step': 15893, 'epoch': 3} {'type': 'loss', 'content': 0.06419280916452408, 'timestamp': '2025-10-01 04:39:08.290084', 'step': 15894, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:08.344754', 'step': 15894, 'epoch': 3} {'type': 'loss', 'content': 0.13958598673343658, 'timestamp': '2025-10-01 04:39:08.346941', 'step': 15895, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:39:08.412652', 'step': 15895, 'epoch': 3} {'type': 'loss', 'content': 0.09777455031871796, 'timestamp': '2025-10-01 04:39:08.419218', 'step': 15896, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:08.472538', 'step': 15896, 'epoch': 3} {'type': 'loss', 'content': 0.07959907501935959, 'timestamp': '2025-10-01 04:39:08.474778', 'step': 15897, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:08.529421', 'step': 15897, 'epoch': 3} {'type': 'loss', 'content': 0.1394345760345459, 'timestamp': '2025-10-01 04:39:08.532691', 'step': 15898, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:08.600538', 'step': 15898, 'epoch': 3} {'type': 'loss', 'content': 0.09647113084793091, 'timestamp': '2025-10-01 04:39:08.603036', 'step': 15899, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:08.660118', 'step': 15899, 'epoch': 3} {'type': 'loss', 'content': 0.09836220741271973, 'timestamp': '2025-10-01 04:39:08.666140', 'step': 15900, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:08.726077', 'step': 15900, 'epoch': 3} {'type': 'loss', 'content': 0.08556132763624191, 'timestamp': '2025-10-01 04:39:08.737587', 'step': 15901, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:08.794172', 'step': 15901, 'epoch': 3} {'type': 'loss', 'content': 0.09691771119832993, 'timestamp': '2025-10-01 04:39:08.796232', 'step': 15902, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:08.850920', 'step': 15902, 'epoch': 3} {'type': 'loss', 'content': 0.03444064408540726, 'timestamp': '2025-10-01 04:39:08.854162', 'step': 15903, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:08.908323', 'step': 15903, 'epoch': 3} {'type': 'loss', 'content': 0.12579114735126495, 'timestamp': '2025-10-01 04:39:08.914578', 'step': 15904, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:08.969058', 'step': 15904, 'epoch': 3} {'type': 'loss', 'content': 0.10161565989255905, 'timestamp': '2025-10-01 04:39:08.971959', 'step': 15905, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:09.027167', 'step': 15905, 'epoch': 3} {'type': 'loss', 'content': 0.06070885434746742, 'timestamp': '2025-10-01 04:39:09.029550', 'step': 15906, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:09.084008', 'step': 15906, 'epoch': 3} {'type': 'loss', 'content': 0.07789044827222824, 'timestamp': '2025-10-01 04:39:09.086895', 'step': 15907, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:09.142572', 'step': 15907, 'epoch': 3} {'type': 'loss', 'content': 0.0927039235830307, 'timestamp': '2025-10-01 04:39:09.147988', 'step': 15908, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:09.204534', 'step': 15908, 'epoch': 3} {'type': 'loss', 'content': 0.11785796284675598, 'timestamp': '2025-10-01 04:39:09.206572', 'step': 15909, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:09.267649', 'step': 15909, 'epoch': 3} {'type': 'loss', 'content': 0.08466234803199768, 'timestamp': '2025-10-01 04:39:09.269384', 'step': 15910, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:09.323204', 'step': 15910, 'epoch': 3} {'type': 'loss', 'content': 0.08012890070676804, 'timestamp': '2025-10-01 04:39:09.325432', 'step': 15911, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:09.379658', 'step': 15911, 'epoch': 3} {'type': 'loss', 'content': 0.13836906850337982, 'timestamp': '2025-10-01 04:39:09.385720', 'step': 15912, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:09.439265', 'step': 15912, 'epoch': 3} {'type': 'loss', 'content': 0.09313299506902695, 'timestamp': '2025-10-01 04:39:09.442202', 'step': 15913, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:39:09.499624', 'step': 15913, 'epoch': 3} {'type': 'loss', 'content': 0.10524675995111465, 'timestamp': '2025-10-01 04:39:09.502111', 'step': 15914, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:09.558314', 'step': 15914, 'epoch': 3} {'type': 'loss', 'content': 0.0939989909529686, 'timestamp': '2025-10-01 04:39:09.560164', 'step': 15915, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:39:09.618233', 'step': 15915, 'epoch': 3} {'type': 'loss', 'content': 0.11892995983362198, 'timestamp': '2025-10-01 04:39:09.624303', 'step': 15916, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:09.677336', 'step': 15916, 'epoch': 3} {'type': 'loss', 'content': 0.06780175119638443, 'timestamp': '2025-10-01 04:39:09.679256', 'step': 15917, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:09.732274', 'step': 15917, 'epoch': 3} {'type': 'loss', 'content': 0.13315308094024658, 'timestamp': '2025-10-01 04:39:09.734498', 'step': 15918, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:09.788761', 'step': 15918, 'epoch': 3} {'type': 'loss', 'content': 0.11318753659725189, 'timestamp': '2025-10-01 04:39:09.790933', 'step': 15919, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:09.844224', 'step': 15919, 'epoch': 3} {'type': 'loss', 'content': 0.12519389390945435, 'timestamp': '2025-10-01 04:39:09.850104', 'step': 15920, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:09.903439', 'step': 15920, 'epoch': 3} {'type': 'loss', 'content': 0.07591552287340164, 'timestamp': '2025-10-01 04:39:09.905840', 'step': 15921, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:09.959448', 'step': 15921, 'epoch': 3} {'type': 'loss', 'content': 0.11751063168048859, 'timestamp': '2025-10-01 04:39:09.961746', 'step': 15922, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:10.017907', 'step': 15922, 'epoch': 3} {'type': 'loss', 'content': 0.18247728049755096, 'timestamp': '2025-10-01 04:39:10.020176', 'step': 15923, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:10.083861', 'step': 15923, 'epoch': 3} {'type': 'loss', 'content': 0.0629940778017044, 'timestamp': '2025-10-01 04:39:10.089337', 'step': 15924, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:10.142911', 'step': 15924, 'epoch': 3} {'type': 'loss', 'content': 0.09107202291488647, 'timestamp': '2025-10-01 04:39:10.146874', 'step': 15925, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:10.200995', 'step': 15925, 'epoch': 3} {'type': 'loss', 'content': 0.10487211495637894, 'timestamp': '2025-10-01 04:39:10.209989', 'step': 15926, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:10.263487', 'step': 15926, 'epoch': 3} {'type': 'loss', 'content': 0.10223135352134705, 'timestamp': '2025-10-01 04:39:10.265907', 'step': 15927, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:10.320396', 'step': 15927, 'epoch': 3} {'type': 'loss', 'content': 0.0754697248339653, 'timestamp': '2025-10-01 04:39:10.326554', 'step': 15928, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:10.385440', 'step': 15928, 'epoch': 3} {'type': 'loss', 'content': 0.10590603947639465, 'timestamp': '2025-10-01 04:39:10.387925', 'step': 15929, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:10.441185', 'step': 15929, 'epoch': 3} {'type': 'loss', 'content': 0.05036213621497154, 'timestamp': '2025-10-01 04:39:10.443058', 'step': 15930, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:39:10.497251', 'step': 15930, 'epoch': 3} {'type': 'loss', 'content': 0.17033345997333527, 'timestamp': '2025-10-01 04:39:10.500033', 'step': 15931, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:10.555189', 'step': 15931, 'epoch': 3} {'type': 'loss', 'content': 0.09689879417419434, 'timestamp': '2025-10-01 04:39:10.561174', 'step': 15932, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:39:10.616280', 'step': 15932, 'epoch': 3} {'type': 'loss', 'content': 0.13174933195114136, 'timestamp': '2025-10-01 04:39:10.618648', 'step': 15933, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:10.682117', 'step': 15933, 'epoch': 3} {'type': 'loss', 'content': 0.04171961918473244, 'timestamp': '2025-10-01 04:39:10.684178', 'step': 15934, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:10.738355', 'step': 15934, 'epoch': 3} {'type': 'loss', 'content': 0.045455705374479294, 'timestamp': '2025-10-01 04:39:10.740665', 'step': 15935, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:10.794377', 'step': 15935, 'epoch': 3} {'type': 'loss', 'content': 0.11069919914007187, 'timestamp': '2025-10-01 04:39:10.801094', 'step': 15936, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:10.854266', 'step': 15936, 'epoch': 3} {'type': 'loss', 'content': 0.07884106785058975, 'timestamp': '2025-10-01 04:39:10.856452', 'step': 15937, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:10.910470', 'step': 15937, 'epoch': 3} {'type': 'loss', 'content': 0.07492046803236008, 'timestamp': '2025-10-01 04:39:10.913031', 'step': 15938, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:10.966806', 'step': 15938, 'epoch': 3} {'type': 'loss', 'content': 0.06405376642942429, 'timestamp': '2025-10-01 04:39:10.979664', 'step': 15939, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:11.042066', 'step': 15939, 'epoch': 3} {'type': 'loss', 'content': 0.12559349834918976, 'timestamp': '2025-10-01 04:39:11.048048', 'step': 15940, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:11.102397', 'step': 15940, 'epoch': 3} {'type': 'loss', 'content': 0.1105189099907875, 'timestamp': '2025-10-01 04:39:11.105382', 'step': 15941, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:11.159706', 'step': 15941, 'epoch': 3} {'type': 'loss', 'content': 0.19851917028427124, 'timestamp': '2025-10-01 04:39:11.162005', 'step': 15942, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:11.226676', 'step': 15942, 'epoch': 3} {'type': 'loss', 'content': 0.14604924619197845, 'timestamp': '2025-10-01 04:39:11.229994', 'step': 15943, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:11.290971', 'step': 15943, 'epoch': 3} {'type': 'loss', 'content': 0.08067121356725693, 'timestamp': '2025-10-01 04:39:11.297168', 'step': 15944, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:11.350872', 'step': 15944, 'epoch': 3} {'type': 'loss', 'content': 0.07984155416488647, 'timestamp': '2025-10-01 04:39:11.353300', 'step': 15945, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:11.407987', 'step': 15945, 'epoch': 3} {'type': 'loss', 'content': 0.1147361472249031, 'timestamp': '2025-10-01 04:39:11.410454', 'step': 15946, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:11.464600', 'step': 15946, 'epoch': 3} {'type': 'loss', 'content': 0.07386253029108047, 'timestamp': '2025-10-01 04:39:11.466947', 'step': 15947, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:11.521126', 'step': 15947, 'epoch': 3} {'type': 'loss', 'content': 0.18736112117767334, 'timestamp': '2025-10-01 04:39:11.527263', 'step': 15948, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:11.580065', 'step': 15948, 'epoch': 3} {'type': 'loss', 'content': 0.16250312328338623, 'timestamp': '2025-10-01 04:39:11.582266', 'step': 15949, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:39:11.643552', 'step': 15949, 'epoch': 3} {'type': 'loss', 'content': 0.04369841888546944, 'timestamp': '2025-10-01 04:39:11.653946', 'step': 15950, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:11.708724', 'step': 15950, 'epoch': 3} {'type': 'loss', 'content': 0.23938895761966705, 'timestamp': '2025-10-01 04:39:11.710891', 'step': 15951, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:11.769914', 'step': 15951, 'epoch': 3} {'type': 'loss', 'content': 0.08424219489097595, 'timestamp': '2025-10-01 04:39:11.777260', 'step': 15952, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:11.835313', 'step': 15952, 'epoch': 3} {'type': 'loss', 'content': 0.18257422745227814, 'timestamp': '2025-10-01 04:39:11.837471', 'step': 15953, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:11.891069', 'step': 15953, 'epoch': 3} {'type': 'loss', 'content': 0.09748806804418564, 'timestamp': '2025-10-01 04:39:11.893369', 'step': 15954, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:11.952265', 'step': 15954, 'epoch': 3} {'type': 'loss', 'content': 0.1992524117231369, 'timestamp': '2025-10-01 04:39:11.954647', 'step': 15955, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:12.011287', 'step': 15955, 'epoch': 3} {'type': 'loss', 'content': 0.05732037127017975, 'timestamp': '2025-10-01 04:39:12.021586', 'step': 15956, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:12.083650', 'step': 15956, 'epoch': 3} {'type': 'loss', 'content': 0.11248779296875, 'timestamp': '2025-10-01 04:39:12.085892', 'step': 15957, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:12.139212', 'step': 15957, 'epoch': 3} {'type': 'loss', 'content': 0.06169435381889343, 'timestamp': '2025-10-01 04:39:12.141398', 'step': 15958, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:12.207875', 'step': 15958, 'epoch': 3} {'type': 'loss', 'content': 0.07835045456886292, 'timestamp': '2025-10-01 04:39:12.211206', 'step': 15959, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:39:12.279208', 'step': 15959, 'epoch': 3} {'type': 'loss', 'content': 0.09176280349493027, 'timestamp': '2025-10-01 04:39:12.285063', 'step': 15960, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:12.348638', 'step': 15960, 'epoch': 3} {'type': 'loss', 'content': 0.18058359622955322, 'timestamp': '2025-10-01 04:39:12.350712', 'step': 15961, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:12.407698', 'step': 15961, 'epoch': 3} {'type': 'loss', 'content': 0.14466439187526703, 'timestamp': '2025-10-01 04:39:12.410069', 'step': 15962, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:39:12.464380', 'step': 15962, 'epoch': 3} {'type': 'loss', 'content': 0.07000917941331863, 'timestamp': '2025-10-01 04:39:12.467729', 'step': 15963, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:12.528575', 'step': 15963, 'epoch': 3} {'type': 'loss', 'content': 0.18495532870292664, 'timestamp': '2025-10-01 04:39:12.535042', 'step': 15964, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:12.591168', 'step': 15964, 'epoch': 3} {'type': 'loss', 'content': 0.12902404367923737, 'timestamp': '2025-10-01 04:39:12.594466', 'step': 15965, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:12.660689', 'step': 15965, 'epoch': 3} {'type': 'loss', 'content': 0.03914686664938927, 'timestamp': '2025-10-01 04:39:12.662780', 'step': 15966, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:12.716172', 'step': 15966, 'epoch': 3} {'type': 'loss', 'content': 0.16306592524051666, 'timestamp': '2025-10-01 04:39:12.722001', 'step': 15967, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:12.781591', 'step': 15967, 'epoch': 3} {'type': 'loss', 'content': 0.11695162951946259, 'timestamp': '2025-10-01 04:39:12.789902', 'step': 15968, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:12.850398', 'step': 15968, 'epoch': 3} {'type': 'loss', 'content': 0.0831720232963562, 'timestamp': '2025-10-01 04:39:12.853294', 'step': 15969, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:12.909505', 'step': 15969, 'epoch': 3} {'type': 'loss', 'content': 0.05938299745321274, 'timestamp': '2025-10-01 04:39:12.912521', 'step': 15970, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:12.973689', 'step': 15970, 'epoch': 3} {'type': 'loss', 'content': 0.07973650097846985, 'timestamp': '2025-10-01 04:39:12.976129', 'step': 15971, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:13.030800', 'step': 15971, 'epoch': 3} {'type': 'loss', 'content': 0.07896026223897934, 'timestamp': '2025-10-01 04:39:13.037030', 'step': 15972, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:13.090927', 'step': 15972, 'epoch': 3} {'type': 'loss', 'content': 0.09283053129911423, 'timestamp': '2025-10-01 04:39:13.093133', 'step': 15973, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:13.150656', 'step': 15973, 'epoch': 3} {'type': 'loss', 'content': 0.08508162200450897, 'timestamp': '2025-10-01 04:39:13.163179', 'step': 15974, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:13.216745', 'step': 15974, 'epoch': 3} {'type': 'loss', 'content': 0.0730801448225975, 'timestamp': '2025-10-01 04:39:13.221099', 'step': 15975, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:39:13.285255', 'step': 15975, 'epoch': 3} {'type': 'loss', 'content': 0.14597897231578827, 'timestamp': '2025-10-01 04:39:13.292085', 'step': 15976, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:13.345620', 'step': 15976, 'epoch': 3} {'type': 'loss', 'content': 0.18347690999507904, 'timestamp': '2025-10-01 04:39:13.348027', 'step': 15977, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:13.401695', 'step': 15977, 'epoch': 3} {'type': 'loss', 'content': 0.03413095325231552, 'timestamp': '2025-10-01 04:39:13.403795', 'step': 15978, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:13.457593', 'step': 15978, 'epoch': 3} {'type': 'loss', 'content': 0.07830925285816193, 'timestamp': '2025-10-01 04:39:13.459971', 'step': 15979, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:13.520290', 'step': 15979, 'epoch': 3} {'type': 'loss', 'content': 0.11229698359966278, 'timestamp': '2025-10-01 04:39:13.526165', 'step': 15980, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:13.580656', 'step': 15980, 'epoch': 3} {'type': 'loss', 'content': 0.220506489276886, 'timestamp': '2025-10-01 04:39:13.583002', 'step': 15981, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:13.636555', 'step': 15981, 'epoch': 3} {'type': 'loss', 'content': 0.10426419228315353, 'timestamp': '2025-10-01 04:39:13.638676', 'step': 15982, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:13.701259', 'step': 15982, 'epoch': 3} {'type': 'loss', 'content': 0.10361690074205399, 'timestamp': '2025-10-01 04:39:13.703468', 'step': 15983, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:13.763273', 'step': 15983, 'epoch': 3} {'type': 'loss', 'content': 0.06306467205286026, 'timestamp': '2025-10-01 04:39:13.775180', 'step': 15984, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:13.833866', 'step': 15984, 'epoch': 3} {'type': 'loss', 'content': 0.07816506922245026, 'timestamp': '2025-10-01 04:39:13.836340', 'step': 15985, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:13.892674', 'step': 15985, 'epoch': 3} {'type': 'loss', 'content': 0.13497425615787506, 'timestamp': '2025-10-01 04:39:13.894888', 'step': 15986, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:39:13.966720', 'step': 15986, 'epoch': 3} {'type': 'loss', 'content': 0.11113949120044708, 'timestamp': '2025-10-01 04:39:13.975762', 'step': 15987, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:14.030002', 'step': 15987, 'epoch': 3} {'type': 'loss', 'content': 0.08289410918951035, 'timestamp': '2025-10-01 04:39:14.036203', 'step': 15988, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:14.091187', 'step': 15988, 'epoch': 3} {'type': 'loss', 'content': 0.10938332229852676, 'timestamp': '2025-10-01 04:39:14.094032', 'step': 15989, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:14.153258', 'step': 15989, 'epoch': 3} {'type': 'loss', 'content': 0.13802547752857208, 'timestamp': '2025-10-01 04:39:14.155669', 'step': 15990, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:14.211214', 'step': 15990, 'epoch': 3} {'type': 'loss', 'content': 0.08399131149053574, 'timestamp': '2025-10-01 04:39:14.214257', 'step': 15991, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:14.268363', 'step': 15991, 'epoch': 3} {'type': 'loss', 'content': 0.09089603275060654, 'timestamp': '2025-10-01 04:39:14.275036', 'step': 15992, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:14.330079', 'step': 15992, 'epoch': 3} {'type': 'loss', 'content': 0.11999128013849258, 'timestamp': '2025-10-01 04:39:14.332157', 'step': 15993, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:14.388492', 'step': 15993, 'epoch': 3} {'type': 'loss', 'content': 0.12005796283483505, 'timestamp': '2025-10-01 04:39:14.391012', 'step': 15994, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:14.451145', 'step': 15994, 'epoch': 3} {'type': 'loss', 'content': 0.1145281046628952, 'timestamp': '2025-10-01 04:39:14.453593', 'step': 15995, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:14.507657', 'step': 15995, 'epoch': 3} {'type': 'loss', 'content': 0.09707359224557877, 'timestamp': '2025-10-01 04:39:14.515207', 'step': 15996, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:14.569246', 'step': 15996, 'epoch': 3} {'type': 'loss', 'content': 0.1324974149465561, 'timestamp': '2025-10-01 04:39:14.574050', 'step': 15997, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:14.629332', 'step': 15997, 'epoch': 3} {'type': 'loss', 'content': 0.0636126920580864, 'timestamp': '2025-10-01 04:39:14.637951', 'step': 15998, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:14.693840', 'step': 15998, 'epoch': 3} {'type': 'loss', 'content': 0.03604799136519432, 'timestamp': '2025-10-01 04:39:14.696199', 'step': 15999, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:14.752592', 'step': 15999, 'epoch': 3} {'type': 'loss', 'content': 0.2013605237007141, 'timestamp': '2025-10-01 04:39:14.758610', 'step': 16000, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 16000', 'timestamp': '2025-10-01 04:39:15.127409', 'step': 16000, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:39:15.183384', 'step': 16000, 'epoch': 3} {'type': 'loss', 'content': 0.06351710855960846, 'timestamp': '2025-10-01 04:39:15.186907', 'step': 16001, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:15.241050', 'step': 16001, 'epoch': 3} {'type': 'loss', 'content': 0.0406506285071373, 'timestamp': '2025-10-01 04:39:15.243038', 'step': 16002, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:15.304963', 'step': 16002, 'epoch': 3} {'type': 'loss', 'content': 0.2031736820936203, 'timestamp': '2025-10-01 04:39:15.307299', 'step': 16003, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:15.361619', 'step': 16003, 'epoch': 3} {'type': 'loss', 'content': 0.058358218520879745, 'timestamp': '2025-10-01 04:39:15.368218', 'step': 16004, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:15.423866', 'step': 16004, 'epoch': 3} {'type': 'loss', 'content': 0.12047581374645233, 'timestamp': '2025-10-01 04:39:15.429485', 'step': 16005, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:39:15.484292', 'step': 16005, 'epoch': 3} {'type': 'loss', 'content': 0.07543478906154633, 'timestamp': '2025-10-01 04:39:15.486478', 'step': 16006, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:15.541176', 'step': 16006, 'epoch': 3} {'type': 'loss', 'content': 0.1246655210852623, 'timestamp': '2025-10-01 04:39:15.543553', 'step': 16007, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:15.603052', 'step': 16007, 'epoch': 3} {'type': 'loss', 'content': 0.07151290774345398, 'timestamp': '2025-10-01 04:39:15.609283', 'step': 16008, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:39:15.665130', 'step': 16008, 'epoch': 3} {'type': 'loss', 'content': 0.05131194368004799, 'timestamp': '2025-10-01 04:39:15.667278', 'step': 16009, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:15.722519', 'step': 16009, 'epoch': 3} {'type': 'loss', 'content': 0.10445216298103333, 'timestamp': '2025-10-01 04:39:15.727719', 'step': 16010, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:15.782896', 'step': 16010, 'epoch': 3} {'type': 'loss', 'content': 0.076607845723629, 'timestamp': '2025-10-01 04:39:15.785394', 'step': 16011, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:39:15.839812', 'step': 16011, 'epoch': 3} {'type': 'loss', 'content': 0.08177097141742706, 'timestamp': '2025-10-01 04:39:15.846345', 'step': 16012, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:15.900824', 'step': 16012, 'epoch': 3} {'type': 'loss', 'content': 0.1242130771279335, 'timestamp': '2025-10-01 04:39:15.903114', 'step': 16013, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:15.957201', 'step': 16013, 'epoch': 3} {'type': 'loss', 'content': 0.06171812489628792, 'timestamp': '2025-10-01 04:39:15.959756', 'step': 16014, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:16.014294', 'step': 16014, 'epoch': 3} {'type': 'loss', 'content': 0.09794925153255463, 'timestamp': '2025-10-01 04:39:16.016964', 'step': 16015, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:16.071167', 'step': 16015, 'epoch': 3} {'type': 'loss', 'content': 0.08394346386194229, 'timestamp': '2025-10-01 04:39:16.077422', 'step': 16016, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:39:16.131307', 'step': 16016, 'epoch': 3} {'type': 'loss', 'content': 0.09898903965950012, 'timestamp': '2025-10-01 04:39:16.133789', 'step': 16017, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:39:16.189278', 'step': 16017, 'epoch': 3} {'type': 'loss', 'content': 0.1269732564687729, 'timestamp': '2025-10-01 04:39:16.191487', 'step': 16018, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:16.245900', 'step': 16018, 'epoch': 3} {'type': 'loss', 'content': 0.11814700812101364, 'timestamp': '2025-10-01 04:39:16.248061', 'step': 16019, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:16.303622', 'step': 16019, 'epoch': 3} {'type': 'loss', 'content': 0.09499572962522507, 'timestamp': '2025-10-01 04:39:16.309987', 'step': 16020, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:39:16.366209', 'step': 16020, 'epoch': 3} {'type': 'loss', 'content': 0.06069764867424965, 'timestamp': '2025-10-01 04:39:16.369078', 'step': 16021, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:16.425874', 'step': 16021, 'epoch': 3} {'type': 'loss', 'content': 0.07780901342630386, 'timestamp': '2025-10-01 04:39:16.428125', 'step': 16022, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:39:16.490797', 'step': 16022, 'epoch': 3} {'type': 'loss', 'content': 0.15206369757652283, 'timestamp': '2025-10-01 04:39:16.493104', 'step': 16023, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:16.547613', 'step': 16023, 'epoch': 3} {'type': 'loss', 'content': 0.07639597356319427, 'timestamp': '2025-10-01 04:39:16.554278', 'step': 16024, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:16.607913', 'step': 16024, 'epoch': 3} {'type': 'loss', 'content': 0.0931834951043129, 'timestamp': '2025-10-01 04:39:16.610026', 'step': 16025, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:16.664698', 'step': 16025, 'epoch': 3} {'type': 'loss', 'content': 0.07269474118947983, 'timestamp': '2025-10-01 04:39:16.679270', 'step': 16026, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:16.738832', 'step': 16026, 'epoch': 3} {'type': 'loss', 'content': 0.12864091992378235, 'timestamp': '2025-10-01 04:39:16.741702', 'step': 16027, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:39:16.796524', 'step': 16027, 'epoch': 3} {'type': 'loss', 'content': 0.1467604786157608, 'timestamp': '2025-10-01 04:39:16.802714', 'step': 16028, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:16.858694', 'step': 16028, 'epoch': 3} {'type': 'loss', 'content': 0.06883816421031952, 'timestamp': '2025-10-01 04:39:16.861609', 'step': 16029, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:16.915826', 'step': 16029, 'epoch': 3} {'type': 'loss', 'content': 0.09414304792881012, 'timestamp': '2025-10-01 04:39:16.918593', 'step': 16030, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:39:16.973140', 'step': 16030, 'epoch': 3} {'type': 'loss', 'content': 0.11036091297864914, 'timestamp': '2025-10-01 04:39:16.981467', 'step': 16031, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:39:17.035491', 'step': 16031, 'epoch': 3} {'type': 'loss', 'content': 0.10914276540279388, 'timestamp': '2025-10-01 04:39:17.041549', 'step': 16032, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:17.100730', 'step': 16032, 'epoch': 3} {'type': 'loss', 'content': 0.1380743533372879, 'timestamp': '2025-10-01 04:39:17.102954', 'step': 16033, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:17.156640', 'step': 16033, 'epoch': 3} {'type': 'loss', 'content': 0.13707007467746735, 'timestamp': '2025-10-01 04:39:17.159085', 'step': 16034, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:39:17.213981', 'step': 16034, 'epoch': 3} {'type': 'loss', 'content': 0.07862085849046707, 'timestamp': '2025-10-01 04:39:17.216675', 'step': 16035, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:17.270416', 'step': 16035, 'epoch': 3} {'type': 'loss', 'content': 0.08127261698246002, 'timestamp': '2025-10-01 04:39:17.276175', 'step': 16036, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:17.328217', 'step': 16036, 'epoch': 3} {'type': 'loss', 'content': 0.0666884332895279, 'timestamp': '2025-10-01 04:39:17.331137', 'step': 16037, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:17.384848', 'step': 16037, 'epoch': 3} {'type': 'loss', 'content': 0.16021934151649475, 'timestamp': '2025-10-01 04:39:17.387122', 'step': 16038, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:17.442360', 'step': 16038, 'epoch': 3} {'type': 'loss', 'content': 0.13652242720127106, 'timestamp': '2025-10-01 04:39:17.444559', 'step': 16039, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:17.498207', 'step': 16039, 'epoch': 3} {'type': 'loss', 'content': 0.07286018878221512, 'timestamp': '2025-10-01 04:39:17.504145', 'step': 16040, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:17.557629', 'step': 16040, 'epoch': 3} {'type': 'loss', 'content': 0.1371975839138031, 'timestamp': '2025-10-01 04:39:17.560034', 'step': 16041, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:17.613246', 'step': 16041, 'epoch': 3} {'type': 'loss', 'content': 0.09624620527029037, 'timestamp': '2025-10-01 04:39:17.615635', 'step': 16042, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:17.671036', 'step': 16042, 'epoch': 3} {'type': 'loss', 'content': 0.09068335592746735, 'timestamp': '2025-10-01 04:39:17.673444', 'step': 16043, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:17.728341', 'step': 16043, 'epoch': 3} {'type': 'loss', 'content': 0.17131105065345764, 'timestamp': '2025-10-01 04:39:17.734706', 'step': 16044, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:17.794947', 'step': 16044, 'epoch': 3} {'type': 'loss', 'content': 0.08303509652614594, 'timestamp': '2025-10-01 04:39:17.805173', 'step': 16045, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:17.859444', 'step': 16045, 'epoch': 3} {'type': 'loss', 'content': 0.0869545117020607, 'timestamp': '2025-10-01 04:39:17.861598', 'step': 16046, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:39:17.917254', 'step': 16046, 'epoch': 3} {'type': 'loss', 'content': 0.09919759631156921, 'timestamp': '2025-10-01 04:39:17.919568', 'step': 16047, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:17.973535', 'step': 16047, 'epoch': 3} {'type': 'loss', 'content': 0.11508754640817642, 'timestamp': '2025-10-01 04:39:17.981186', 'step': 16048, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:18.033593', 'step': 16048, 'epoch': 3} {'type': 'loss', 'content': 0.10201258957386017, 'timestamp': '2025-10-01 04:39:18.036254', 'step': 16049, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:18.089234', 'step': 16049, 'epoch': 3} {'type': 'loss', 'content': 0.10306421667337418, 'timestamp': '2025-10-01 04:39:18.091669', 'step': 16050, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:39:18.155214', 'step': 16050, 'epoch': 3} {'type': 'loss', 'content': 0.11844097822904587, 'timestamp': '2025-10-01 04:39:18.157786', 'step': 16051, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:18.211248', 'step': 16051, 'epoch': 3} {'type': 'loss', 'content': 0.10956327617168427, 'timestamp': '2025-10-01 04:39:18.218469', 'step': 16052, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:18.271983', 'step': 16052, 'epoch': 3} {'type': 'loss', 'content': 0.17773401737213135, 'timestamp': '2025-10-01 04:39:18.274601', 'step': 16053, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:18.329545', 'step': 16053, 'epoch': 3} {'type': 'loss', 'content': 0.045300599187612534, 'timestamp': '2025-10-01 04:39:18.332068', 'step': 16054, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:39:18.393598', 'step': 16054, 'epoch': 3} {'type': 'loss', 'content': 0.09525671601295471, 'timestamp': '2025-10-01 04:39:18.396304', 'step': 16055, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:18.451257', 'step': 16055, 'epoch': 3} {'type': 'loss', 'content': 0.20583027601242065, 'timestamp': '2025-10-01 04:39:18.458099', 'step': 16056, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:18.513320', 'step': 16056, 'epoch': 3} {'type': 'loss', 'content': 0.07159782201051712, 'timestamp': '2025-10-01 04:39:18.515827', 'step': 16057, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:18.569772', 'step': 16057, 'epoch': 3} {'type': 'loss', 'content': 0.1386829912662506, 'timestamp': '2025-10-01 04:39:18.572096', 'step': 16058, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:39:18.627936', 'step': 16058, 'epoch': 3} {'type': 'loss', 'content': 0.09866857528686523, 'timestamp': '2025-10-01 04:39:18.637496', 'step': 16059, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:18.691844', 'step': 16059, 'epoch': 3} {'type': 'loss', 'content': 0.04825543239712715, 'timestamp': '2025-10-01 04:39:18.698884', 'step': 16060, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:18.753896', 'step': 16060, 'epoch': 3} {'type': 'loss', 'content': 0.0694180279970169, 'timestamp': '2025-10-01 04:39:18.755917', 'step': 16061, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:18.809039', 'step': 16061, 'epoch': 3} {'type': 'loss', 'content': 0.09838542342185974, 'timestamp': '2025-10-01 04:39:18.810879', 'step': 16062, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:39:18.864363', 'step': 16062, 'epoch': 3} {'type': 'loss', 'content': 0.08931806683540344, 'timestamp': '2025-10-01 04:39:18.866555', 'step': 16063, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:18.919643', 'step': 16063, 'epoch': 3} {'type': 'loss', 'content': 0.10988417267799377, 'timestamp': '2025-10-01 04:39:18.925977', 'step': 16064, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:39:18.988764', 'step': 16064, 'epoch': 3} {'type': 'loss', 'content': 0.10314623266458511, 'timestamp': '2025-10-01 04:39:18.990764', 'step': 16065, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:19.043242', 'step': 16065, 'epoch': 3} {'type': 'loss', 'content': 0.05386511608958244, 'timestamp': '2025-10-01 04:39:19.045782', 'step': 16066, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:19.099165', 'step': 16066, 'epoch': 3} {'type': 'loss', 'content': 0.1014927551150322, 'timestamp': '2025-10-01 04:39:19.101441', 'step': 16067, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:19.155349', 'step': 16067, 'epoch': 3} {'type': 'loss', 'content': 0.07652676850557327, 'timestamp': '2025-10-01 04:39:19.161321', 'step': 16068, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:19.215145', 'step': 16068, 'epoch': 3} {'type': 'loss', 'content': 0.1681567281484604, 'timestamp': '2025-10-01 04:39:19.217252', 'step': 16069, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:19.271235', 'step': 16069, 'epoch': 3} {'type': 'loss', 'content': 0.077551931142807, 'timestamp': '2025-10-01 04:39:19.273550', 'step': 16070, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:19.326868', 'step': 16070, 'epoch': 3} {'type': 'loss', 'content': 0.08235988020896912, 'timestamp': '2025-10-01 04:39:19.329280', 'step': 16071, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:19.383547', 'step': 16071, 'epoch': 3} {'type': 'loss', 'content': 0.048616647720336914, 'timestamp': '2025-10-01 04:39:19.389599', 'step': 16072, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:19.451418', 'step': 16072, 'epoch': 3} {'type': 'loss', 'content': 0.0795910432934761, 'timestamp': '2025-10-01 04:39:19.453903', 'step': 16073, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:19.507582', 'step': 16073, 'epoch': 3} {'type': 'loss', 'content': 0.08584170788526535, 'timestamp': '2025-10-01 04:39:19.509874', 'step': 16074, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:19.564879', 'step': 16074, 'epoch': 3} {'type': 'loss', 'content': 0.07972817867994308, 'timestamp': '2025-10-01 04:39:19.567113', 'step': 16075, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-01 04:39:19.620818', 'step': 16075, 'epoch': 3} {'type': 'loss', 'content': 0.10584084689617157, 'timestamp': '2025-10-01 04:39:19.627461', 'step': 16076, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:39:19.680840', 'step': 16076, 'epoch': 3} {'type': 'loss', 'content': 0.12222088873386383, 'timestamp': '2025-10-01 04:39:19.683350', 'step': 16077, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:19.748655', 'step': 16077, 'epoch': 3} {'type': 'loss', 'content': 0.15013791620731354, 'timestamp': '2025-10-01 04:39:19.751257', 'step': 16078, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:19.804938', 'step': 16078, 'epoch': 3} {'type': 'loss', 'content': 0.05136250704526901, 'timestamp': '2025-10-01 04:39:19.813294', 'step': 16079, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:19.866763', 'step': 16079, 'epoch': 3} {'type': 'loss', 'content': 0.07555253803730011, 'timestamp': '2025-10-01 04:39:19.872882', 'step': 16080, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:19.925962', 'step': 16080, 'epoch': 3} {'type': 'loss', 'content': 0.0946323424577713, 'timestamp': '2025-10-01 04:39:19.928319', 'step': 16081, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:39:19.981759', 'step': 16081, 'epoch': 3} {'type': 'loss', 'content': 0.1333777755498886, 'timestamp': '2025-10-01 04:39:19.984042', 'step': 16082, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:20.042815', 'step': 16082, 'epoch': 3} {'type': 'loss', 'content': 0.049555450677871704, 'timestamp': '2025-10-01 04:39:20.045189', 'step': 16083, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:39:20.099287', 'step': 16083, 'epoch': 3} {'type': 'loss', 'content': 0.04737605154514313, 'timestamp': '2025-10-01 04:39:20.105771', 'step': 16084, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:20.159409', 'step': 16084, 'epoch': 3} {'type': 'loss', 'content': 0.10597763955593109, 'timestamp': '2025-10-01 04:39:20.164223', 'step': 16085, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:20.218741', 'step': 16085, 'epoch': 3} {'type': 'loss', 'content': 0.11261005699634552, 'timestamp': '2025-10-01 04:39:20.220873', 'step': 16086, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:20.274866', 'step': 16086, 'epoch': 3} {'type': 'loss', 'content': 0.09067793190479279, 'timestamp': '2025-10-01 04:39:20.288258', 'step': 16087, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:20.341222', 'step': 16087, 'epoch': 3} {'type': 'loss', 'content': 0.042467884719371796, 'timestamp': '2025-10-01 04:39:20.347617', 'step': 16088, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:20.401350', 'step': 16088, 'epoch': 3} {'type': 'loss', 'content': 0.054793938994407654, 'timestamp': '2025-10-01 04:39:20.403577', 'step': 16089, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:20.457267', 'step': 16089, 'epoch': 3} {'type': 'loss', 'content': 0.15764667093753815, 'timestamp': '2025-10-01 04:39:20.459583', 'step': 16090, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:20.519130', 'step': 16090, 'epoch': 3} {'type': 'loss', 'content': 0.09655817598104477, 'timestamp': '2025-10-01 04:39:20.521355', 'step': 16091, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:20.577018', 'step': 16091, 'epoch': 3} {'type': 'loss', 'content': 0.1373719573020935, 'timestamp': '2025-10-01 04:39:20.583652', 'step': 16092, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:20.639890', 'step': 16092, 'epoch': 3} {'type': 'loss', 'content': 0.06967589259147644, 'timestamp': '2025-10-01 04:39:20.642343', 'step': 16093, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:39:20.697724', 'step': 16093, 'epoch': 3} {'type': 'loss', 'content': 0.1034274697303772, 'timestamp': '2025-10-01 04:39:20.700037', 'step': 16094, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:20.755708', 'step': 16094, 'epoch': 3} {'type': 'loss', 'content': 0.09618839621543884, 'timestamp': '2025-10-01 04:39:20.758206', 'step': 16095, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:20.819991', 'step': 16095, 'epoch': 3} {'type': 'loss', 'content': 0.14421185851097107, 'timestamp': '2025-10-01 04:39:20.826428', 'step': 16096, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:20.880449', 'step': 16096, 'epoch': 3} {'type': 'loss', 'content': 0.0703643262386322, 'timestamp': '2025-10-01 04:39:20.883484', 'step': 16097, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:20.937191', 'step': 16097, 'epoch': 3} {'type': 'loss', 'content': 0.0870649665594101, 'timestamp': '2025-10-01 04:39:20.939586', 'step': 16098, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:20.994346', 'step': 16098, 'epoch': 3} {'type': 'loss', 'content': 0.09477464109659195, 'timestamp': '2025-10-01 04:39:20.996776', 'step': 16099, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:21.051190', 'step': 16099, 'epoch': 3} {'type': 'loss', 'content': 0.12388801574707031, 'timestamp': '2025-10-01 04:39:21.057617', 'step': 16100, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:21.111243', 'step': 16100, 'epoch': 3} {'type': 'loss', 'content': 0.08464039117097855, 'timestamp': '2025-10-01 04:39:21.113463', 'step': 16101, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:21.167212', 'step': 16101, 'epoch': 3} {'type': 'loss', 'content': 0.07676313072443008, 'timestamp': '2025-10-01 04:39:21.169728', 'step': 16102, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:21.223528', 'step': 16102, 'epoch': 3} {'type': 'loss', 'content': 0.12181715667247772, 'timestamp': '2025-10-01 04:39:21.226293', 'step': 16103, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:39:21.280278', 'step': 16103, 'epoch': 3} {'type': 'loss', 'content': 0.07126961648464203, 'timestamp': '2025-10-01 04:39:21.286265', 'step': 16104, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:39:21.338822', 'step': 16104, 'epoch': 3} {'type': 'loss', 'content': 0.028045538812875748, 'timestamp': '2025-10-01 04:39:21.341522', 'step': 16105, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:21.395027', 'step': 16105, 'epoch': 3} {'type': 'loss', 'content': 0.060781046748161316, 'timestamp': '2025-10-01 04:39:21.397445', 'step': 16106, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:21.450344', 'step': 16106, 'epoch': 3} {'type': 'loss', 'content': 0.15401431918144226, 'timestamp': '2025-10-01 04:39:21.453096', 'step': 16107, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:21.506707', 'step': 16107, 'epoch': 3} {'type': 'loss', 'content': 0.134299635887146, 'timestamp': '2025-10-01 04:39:21.512668', 'step': 16108, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:21.565785', 'step': 16108, 'epoch': 3} {'type': 'loss', 'content': 0.058106470853090286, 'timestamp': '2025-10-01 04:39:21.567992', 'step': 16109, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:21.622571', 'step': 16109, 'epoch': 3} {'type': 'loss', 'content': 0.08548721671104431, 'timestamp': '2025-10-01 04:39:21.624782', 'step': 16110, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:21.679755', 'step': 16110, 'epoch': 3} {'type': 'loss', 'content': 0.14452475309371948, 'timestamp': '2025-10-01 04:39:21.681671', 'step': 16111, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:21.735550', 'step': 16111, 'epoch': 3} {'type': 'loss', 'content': 0.03319305554032326, 'timestamp': '2025-10-01 04:39:21.743336', 'step': 16112, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:21.798478', 'step': 16112, 'epoch': 3} {'type': 'loss', 'content': 0.11886754631996155, 'timestamp': '2025-10-01 04:39:21.802151', 'step': 16113, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:21.856595', 'step': 16113, 'epoch': 3} {'type': 'loss', 'content': 0.1255621314048767, 'timestamp': '2025-10-01 04:39:21.859003', 'step': 16114, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:21.912933', 'step': 16114, 'epoch': 3} {'type': 'loss', 'content': 0.10141915082931519, 'timestamp': '2025-10-01 04:39:21.915241', 'step': 16115, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:21.970270', 'step': 16115, 'epoch': 3} {'type': 'loss', 'content': 0.08529892563819885, 'timestamp': '2025-10-01 04:39:21.976562', 'step': 16116, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:22.029451', 'step': 16116, 'epoch': 3} {'type': 'loss', 'content': 0.0889650285243988, 'timestamp': '2025-10-01 04:39:22.032392', 'step': 16117, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:22.085803', 'step': 16117, 'epoch': 3} {'type': 'loss', 'content': 0.0993029773235321, 'timestamp': '2025-10-01 04:39:22.088067', 'step': 16118, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:22.141694', 'step': 16118, 'epoch': 3} {'type': 'loss', 'content': 0.055736906826496124, 'timestamp': '2025-10-01 04:39:22.143983', 'step': 16119, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:22.197461', 'step': 16119, 'epoch': 3} {'type': 'loss', 'content': 0.08380205184221268, 'timestamp': '2025-10-01 04:39:22.203612', 'step': 16120, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:22.256349', 'step': 16120, 'epoch': 3} {'type': 'loss', 'content': 0.026299800723791122, 'timestamp': '2025-10-01 04:39:22.258494', 'step': 16121, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:22.311728', 'step': 16121, 'epoch': 3} {'type': 'loss', 'content': 0.062031641602516174, 'timestamp': '2025-10-01 04:39:22.313903', 'step': 16122, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:22.368346', 'step': 16122, 'epoch': 3} {'type': 'loss', 'content': 0.07263186573982239, 'timestamp': '2025-10-01 04:39:22.370554', 'step': 16123, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:22.430284', 'step': 16123, 'epoch': 3} {'type': 'loss', 'content': 0.10242468863725662, 'timestamp': '2025-10-01 04:39:22.436280', 'step': 16124, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:22.490211', 'step': 16124, 'epoch': 3} {'type': 'loss', 'content': 0.08636215329170227, 'timestamp': '2025-10-01 04:39:22.492404', 'step': 16125, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:39:22.545724', 'step': 16125, 'epoch': 3} {'type': 'loss', 'content': 0.17403237521648407, 'timestamp': '2025-10-01 04:39:22.547967', 'step': 16126, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:22.602548', 'step': 16126, 'epoch': 3} {'type': 'loss', 'content': 0.0869765505194664, 'timestamp': '2025-10-01 04:39:22.604799', 'step': 16127, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:22.658030', 'step': 16127, 'epoch': 3} {'type': 'loss', 'content': 0.06356880813837051, 'timestamp': '2025-10-01 04:39:22.663992', 'step': 16128, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:22.718605', 'step': 16128, 'epoch': 3} {'type': 'loss', 'content': 0.06345593929290771, 'timestamp': '2025-10-01 04:39:22.720879', 'step': 16129, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:39:22.778472', 'step': 16129, 'epoch': 3} {'type': 'loss', 'content': 0.17631344497203827, 'timestamp': '2025-10-01 04:39:22.780779', 'step': 16130, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:22.834349', 'step': 16130, 'epoch': 3} {'type': 'loss', 'content': 0.037085723131895065, 'timestamp': '2025-10-01 04:39:22.836520', 'step': 16131, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-01 04:39:22.894721', 'step': 16131, 'epoch': 3} {'type': 'loss', 'content': 0.13584677875041962, 'timestamp': '2025-10-01 04:39:22.900567', 'step': 16132, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:22.953467', 'step': 16132, 'epoch': 3} {'type': 'loss', 'content': 0.058493729680776596, 'timestamp': '2025-10-01 04:39:22.955868', 'step': 16133, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:23.015064', 'step': 16133, 'epoch': 3} {'type': 'loss', 'content': 0.0565425269305706, 'timestamp': '2025-10-01 04:39:23.018479', 'step': 16134, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:23.071959', 'step': 16134, 'epoch': 3} {'type': 'loss', 'content': 0.113428495824337, 'timestamp': '2025-10-01 04:39:23.074350', 'step': 16135, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:23.127484', 'step': 16135, 'epoch': 3} {'type': 'loss', 'content': 0.11089848726987839, 'timestamp': '2025-10-01 04:39:23.133350', 'step': 16136, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:23.185951', 'step': 16136, 'epoch': 3} {'type': 'loss', 'content': 0.08363485336303711, 'timestamp': '2025-10-01 04:39:23.188312', 'step': 16137, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:23.242140', 'step': 16137, 'epoch': 3} {'type': 'loss', 'content': 0.09547083079814911, 'timestamp': '2025-10-01 04:39:23.244345', 'step': 16138, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:23.299478', 'step': 16138, 'epoch': 3} {'type': 'loss', 'content': 0.09301231056451797, 'timestamp': '2025-10-01 04:39:23.301676', 'step': 16139, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:39:23.356036', 'step': 16139, 'epoch': 3} {'type': 'loss', 'content': 0.0866161435842514, 'timestamp': '2025-10-01 04:39:23.361936', 'step': 16140, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:23.415533', 'step': 16140, 'epoch': 3} {'type': 'loss', 'content': 0.12945809960365295, 'timestamp': '2025-10-01 04:39:23.417850', 'step': 16141, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:23.472387', 'step': 16141, 'epoch': 3} {'type': 'loss', 'content': 0.08810992538928986, 'timestamp': '2025-10-01 04:39:23.475681', 'step': 16142, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:23.530210', 'step': 16142, 'epoch': 3} {'type': 'loss', 'content': 0.069771908223629, 'timestamp': '2025-10-01 04:39:23.533162', 'step': 16143, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:23.588344', 'step': 16143, 'epoch': 3} {'type': 'loss', 'content': 0.05644955113530159, 'timestamp': '2025-10-01 04:39:23.595785', 'step': 16144, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:23.649411', 'step': 16144, 'epoch': 3} {'type': 'loss', 'content': 0.11348214745521545, 'timestamp': '2025-10-01 04:39:23.651742', 'step': 16145, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:23.705006', 'step': 16145, 'epoch': 3} {'type': 'loss', 'content': 0.1824716478586197, 'timestamp': '2025-10-01 04:39:23.707640', 'step': 16146, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:23.761948', 'step': 16146, 'epoch': 3} {'type': 'loss', 'content': 0.09458053112030029, 'timestamp': '2025-10-01 04:39:23.764212', 'step': 16147, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:23.817492', 'step': 16147, 'epoch': 3} {'type': 'loss', 'content': 0.15101872384548187, 'timestamp': '2025-10-01 04:39:23.823551', 'step': 16148, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:23.880601', 'step': 16148, 'epoch': 3} {'type': 'loss', 'content': 0.07509180158376694, 'timestamp': '2025-10-01 04:39:23.882904', 'step': 16149, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:23.943449', 'step': 16149, 'epoch': 3} {'type': 'loss', 'content': 0.1351127028465271, 'timestamp': '2025-10-01 04:39:23.945693', 'step': 16150, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:39:23.999699', 'step': 16150, 'epoch': 3} {'type': 'loss', 'content': 0.1969470977783203, 'timestamp': '2025-10-01 04:39:24.001791', 'step': 16151, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:24.055854', 'step': 16151, 'epoch': 3} {'type': 'loss', 'content': 0.13378377258777618, 'timestamp': '2025-10-01 04:39:24.061952', 'step': 16152, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:24.120191', 'step': 16152, 'epoch': 3} {'type': 'loss', 'content': 0.0781652182340622, 'timestamp': '2025-10-01 04:39:24.122344', 'step': 16153, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:24.180657', 'step': 16153, 'epoch': 3} {'type': 'loss', 'content': 0.10001739114522934, 'timestamp': '2025-10-01 04:39:24.183590', 'step': 16154, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:24.245225', 'step': 16154, 'epoch': 3} {'type': 'loss', 'content': 0.06595177948474884, 'timestamp': '2025-10-01 04:39:24.247337', 'step': 16155, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:24.300405', 'step': 16155, 'epoch': 3} {'type': 'loss', 'content': 0.058888208121061325, 'timestamp': '2025-10-01 04:39:24.306546', 'step': 16156, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:24.359247', 'step': 16156, 'epoch': 3} {'type': 'loss', 'content': 0.11525560915470123, 'timestamp': '2025-10-01 04:39:24.361455', 'step': 16157, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-01 04:39:24.417535', 'step': 16157, 'epoch': 3} {'type': 'loss', 'content': 0.190518319606781, 'timestamp': '2025-10-01 04:39:24.423908', 'step': 16158, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:24.478396', 'step': 16158, 'epoch': 3} {'type': 'loss', 'content': 0.050260379910469055, 'timestamp': '2025-10-01 04:39:24.480629', 'step': 16159, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:24.534679', 'step': 16159, 'epoch': 3} {'type': 'loss', 'content': 0.14516735076904297, 'timestamp': '2025-10-01 04:39:24.540803', 'step': 16160, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:24.594254', 'step': 16160, 'epoch': 3} {'type': 'loss', 'content': 0.15813259780406952, 'timestamp': '2025-10-01 04:39:24.596799', 'step': 16161, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:24.649658', 'step': 16161, 'epoch': 3} {'type': 'loss', 'content': 0.08812170475721359, 'timestamp': '2025-10-01 04:39:24.652026', 'step': 16162, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:24.705519', 'step': 16162, 'epoch': 3} {'type': 'loss', 'content': 0.07092294842004776, 'timestamp': '2025-10-01 04:39:24.708498', 'step': 16163, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:24.763624', 'step': 16163, 'epoch': 3} {'type': 'loss', 'content': 0.10800568759441376, 'timestamp': '2025-10-01 04:39:24.771257', 'step': 16164, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:24.825827', 'step': 16164, 'epoch': 3} {'type': 'loss', 'content': 0.0879257321357727, 'timestamp': '2025-10-01 04:39:24.828563', 'step': 16165, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:39:24.882633', 'step': 16165, 'epoch': 3} {'type': 'loss', 'content': 0.062407299876213074, 'timestamp': '2025-10-01 04:39:24.885255', 'step': 16166, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:24.939749', 'step': 16166, 'epoch': 3} {'type': 'loss', 'content': 0.069928377866745, 'timestamp': '2025-10-01 04:39:24.942360', 'step': 16167, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:24.996794', 'step': 16167, 'epoch': 3} {'type': 'loss', 'content': 0.0709662064909935, 'timestamp': '2025-10-01 04:39:25.002975', 'step': 16168, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:25.056792', 'step': 16168, 'epoch': 3} {'type': 'loss', 'content': 0.14493238925933838, 'timestamp': '2025-10-01 04:39:25.059593', 'step': 16169, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:25.115781', 'step': 16169, 'epoch': 3} {'type': 'loss', 'content': 0.10088741779327393, 'timestamp': '2025-10-01 04:39:25.118726', 'step': 16170, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:25.177869', 'step': 16170, 'epoch': 3} {'type': 'loss', 'content': 0.08147116005420685, 'timestamp': '2025-10-01 04:39:25.180345', 'step': 16171, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:25.237741', 'step': 16171, 'epoch': 3} {'type': 'loss', 'content': 0.09489927440881729, 'timestamp': '2025-10-01 04:39:25.244061', 'step': 16172, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:25.302152', 'step': 16172, 'epoch': 3} {'type': 'loss', 'content': 0.1277828812599182, 'timestamp': '2025-10-01 04:39:25.304529', 'step': 16173, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:25.358620', 'step': 16173, 'epoch': 3} {'type': 'loss', 'content': 0.09887683391571045, 'timestamp': '2025-10-01 04:39:25.361186', 'step': 16174, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:25.424148', 'step': 16174, 'epoch': 3} {'type': 'loss', 'content': 0.0933128073811531, 'timestamp': '2025-10-01 04:39:25.426353', 'step': 16175, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:25.484756', 'step': 16175, 'epoch': 3} {'type': 'loss', 'content': 0.10802377015352249, 'timestamp': '2025-10-01 04:39:25.490691', 'step': 16176, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:25.543718', 'step': 16176, 'epoch': 3} {'type': 'loss', 'content': 0.0591496117413044, 'timestamp': '2025-10-01 04:39:25.546341', 'step': 16177, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:25.600451', 'step': 16177, 'epoch': 3} {'type': 'loss', 'content': 0.12578043341636658, 'timestamp': '2025-10-01 04:39:25.604669', 'step': 16178, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:25.661380', 'step': 16178, 'epoch': 3} {'type': 'loss', 'content': 0.12733279168605804, 'timestamp': '2025-10-01 04:39:25.663888', 'step': 16179, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:25.718536', 'step': 16179, 'epoch': 3} {'type': 'loss', 'content': 0.11231228709220886, 'timestamp': '2025-10-01 04:39:25.724858', 'step': 16180, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:39:25.779271', 'step': 16180, 'epoch': 3} {'type': 'loss', 'content': 0.10178610682487488, 'timestamp': '2025-10-01 04:39:25.781689', 'step': 16181, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:25.835488', 'step': 16181, 'epoch': 3} {'type': 'loss', 'content': 0.17463991045951843, 'timestamp': '2025-10-01 04:39:25.838221', 'step': 16182, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:25.895254', 'step': 16182, 'epoch': 3} {'type': 'loss', 'content': 0.09480185806751251, 'timestamp': '2025-10-01 04:39:25.898537', 'step': 16183, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:25.952863', 'step': 16183, 'epoch': 3} {'type': 'loss', 'content': 0.049851641058921814, 'timestamp': '2025-10-01 04:39:25.963288', 'step': 16184, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:26.017037', 'step': 16184, 'epoch': 3} {'type': 'loss', 'content': 0.09550616145133972, 'timestamp': '2025-10-01 04:39:26.020301', 'step': 16185, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:26.075232', 'step': 16185, 'epoch': 3} {'type': 'loss', 'content': 0.11130006611347198, 'timestamp': '2025-10-01 04:39:26.078985', 'step': 16186, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:26.133964', 'step': 16186, 'epoch': 3} {'type': 'loss', 'content': 0.1548086553812027, 'timestamp': '2025-10-01 04:39:26.136364', 'step': 16187, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:26.194942', 'step': 16187, 'epoch': 3} {'type': 'loss', 'content': 0.021608982235193253, 'timestamp': '2025-10-01 04:39:26.200919', 'step': 16188, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:26.255568', 'step': 16188, 'epoch': 3} {'type': 'loss', 'content': 0.07230280339717865, 'timestamp': '2025-10-01 04:39:26.264409', 'step': 16189, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:26.317729', 'step': 16189, 'epoch': 3} {'type': 'loss', 'content': 0.07906349003314972, 'timestamp': '2025-10-01 04:39:26.320587', 'step': 16190, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:26.374885', 'step': 16190, 'epoch': 3} {'type': 'loss', 'content': 0.11088965833187103, 'timestamp': '2025-10-01 04:39:26.377400', 'step': 16191, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:39:26.434215', 'step': 16191, 'epoch': 3} {'type': 'loss', 'content': 0.06908223778009415, 'timestamp': '2025-10-01 04:39:26.440114', 'step': 16192, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:26.495462', 'step': 16192, 'epoch': 3} {'type': 'loss', 'content': 0.13750874996185303, 'timestamp': '2025-10-01 04:39:26.498089', 'step': 16193, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:26.553131', 'step': 16193, 'epoch': 3} {'type': 'loss', 'content': 0.05587447062134743, 'timestamp': '2025-10-01 04:39:26.556036', 'step': 16194, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:26.611822', 'step': 16194, 'epoch': 3} {'type': 'loss', 'content': 0.05740778520703316, 'timestamp': '2025-10-01 04:39:26.620341', 'step': 16195, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:26.674670', 'step': 16195, 'epoch': 3} {'type': 'loss', 'content': 0.10274787247180939, 'timestamp': '2025-10-01 04:39:26.680611', 'step': 16196, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:26.733934', 'step': 16196, 'epoch': 3} {'type': 'loss', 'content': 0.15438894927501678, 'timestamp': '2025-10-01 04:39:26.736221', 'step': 16197, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:26.790347', 'step': 16197, 'epoch': 3} {'type': 'loss', 'content': 0.12241887301206589, 'timestamp': '2025-10-01 04:39:26.792774', 'step': 16198, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:26.850231', 'step': 16198, 'epoch': 3} {'type': 'loss', 'content': 0.09835251420736313, 'timestamp': '2025-10-01 04:39:26.852777', 'step': 16199, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:26.910460', 'step': 16199, 'epoch': 3} {'type': 'loss', 'content': 0.11268960684537888, 'timestamp': '2025-10-01 04:39:26.916691', 'step': 16200, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:26.971499', 'step': 16200, 'epoch': 3} {'type': 'loss', 'content': 0.10090308636426926, 'timestamp': '2025-10-01 04:39:26.973804', 'step': 16201, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:27.027289', 'step': 16201, 'epoch': 3} {'type': 'loss', 'content': 0.10655342042446136, 'timestamp': '2025-10-01 04:39:27.029492', 'step': 16202, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:27.083880', 'step': 16202, 'epoch': 3} {'type': 'loss', 'content': 0.11855604499578476, 'timestamp': '2025-10-01 04:39:27.086336', 'step': 16203, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:27.148856', 'step': 16203, 'epoch': 3} {'type': 'loss', 'content': 0.09897670149803162, 'timestamp': '2025-10-01 04:39:27.154876', 'step': 16204, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:39:27.207722', 'step': 16204, 'epoch': 3} {'type': 'loss', 'content': 0.089186891913414, 'timestamp': '2025-10-01 04:39:27.211621', 'step': 16205, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:27.272426', 'step': 16205, 'epoch': 3} {'type': 'loss', 'content': 0.14274321496486664, 'timestamp': '2025-10-01 04:39:27.275032', 'step': 16206, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:27.328403', 'step': 16206, 'epoch': 3} {'type': 'loss', 'content': 0.13899366557598114, 'timestamp': '2025-10-01 04:39:27.330802', 'step': 16207, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:27.384792', 'step': 16207, 'epoch': 3} {'type': 'loss', 'content': 0.08393919467926025, 'timestamp': '2025-10-01 04:39:27.391557', 'step': 16208, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:27.444280', 'step': 16208, 'epoch': 3} {'type': 'loss', 'content': 0.12077916413545609, 'timestamp': '2025-10-01 04:39:27.446470', 'step': 16209, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:27.499709', 'step': 16209, 'epoch': 3} {'type': 'loss', 'content': 0.11104068160057068, 'timestamp': '2025-10-01 04:39:27.501982', 'step': 16210, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:27.555297', 'step': 16210, 'epoch': 3} {'type': 'loss', 'content': 0.1202421486377716, 'timestamp': '2025-10-01 04:39:27.557642', 'step': 16211, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:27.610996', 'step': 16211, 'epoch': 3} {'type': 'loss', 'content': 0.06866694241762161, 'timestamp': '2025-10-01 04:39:27.617107', 'step': 16212, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:27.669813', 'step': 16212, 'epoch': 3} {'type': 'loss', 'content': 0.06272164732217789, 'timestamp': '2025-10-01 04:39:27.673202', 'step': 16213, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:27.726579', 'step': 16213, 'epoch': 3} {'type': 'loss', 'content': 0.06726160645484924, 'timestamp': '2025-10-01 04:39:27.729708', 'step': 16214, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:39:27.784009', 'step': 16214, 'epoch': 3} {'type': 'loss', 'content': 0.09109564125537872, 'timestamp': '2025-10-01 04:39:27.786346', 'step': 16215, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:27.840969', 'step': 16215, 'epoch': 3} {'type': 'loss', 'content': 0.12872572243213654, 'timestamp': '2025-10-01 04:39:27.846787', 'step': 16216, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:27.900185', 'step': 16216, 'epoch': 3} {'type': 'loss', 'content': 0.056744348257780075, 'timestamp': '2025-10-01 04:39:27.902480', 'step': 16217, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:27.955898', 'step': 16217, 'epoch': 3} {'type': 'loss', 'content': 0.06313572078943253, 'timestamp': '2025-10-01 04:39:27.959546', 'step': 16218, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:28.024608', 'step': 16218, 'epoch': 3} {'type': 'loss', 'content': 0.08568236231803894, 'timestamp': '2025-10-01 04:39:28.026785', 'step': 16219, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:28.081531', 'step': 16219, 'epoch': 3} {'type': 'loss', 'content': 0.05874687805771828, 'timestamp': '2025-10-01 04:39:28.087543', 'step': 16220, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:28.141000', 'step': 16220, 'epoch': 3} {'type': 'loss', 'content': 0.19584625959396362, 'timestamp': '2025-10-01 04:39:28.143288', 'step': 16221, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:28.196753', 'step': 16221, 'epoch': 3} {'type': 'loss', 'content': 0.09524136781692505, 'timestamp': '2025-10-01 04:39:28.199666', 'step': 16222, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:28.252937', 'step': 16222, 'epoch': 3} {'type': 'loss', 'content': 0.15913020074367523, 'timestamp': '2025-10-01 04:39:28.255378', 'step': 16223, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:28.308886', 'step': 16223, 'epoch': 3} {'type': 'loss', 'content': 0.11101111024618149, 'timestamp': '2025-10-01 04:39:28.314662', 'step': 16224, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:28.367272', 'step': 16224, 'epoch': 3} {'type': 'loss', 'content': 0.12929852306842804, 'timestamp': '2025-10-01 04:39:28.369437', 'step': 16225, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:28.422480', 'step': 16225, 'epoch': 3} {'type': 'loss', 'content': 0.17844213545322418, 'timestamp': '2025-10-01 04:39:28.424800', 'step': 16226, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:28.479730', 'step': 16226, 'epoch': 3} {'type': 'loss', 'content': 0.10382808744907379, 'timestamp': '2025-10-01 04:39:28.481909', 'step': 16227, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:28.534985', 'step': 16227, 'epoch': 3} {'type': 'loss', 'content': 0.10726168751716614, 'timestamp': '2025-10-01 04:39:28.541084', 'step': 16228, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:28.594605', 'step': 16228, 'epoch': 3} {'type': 'loss', 'content': 0.05184634029865265, 'timestamp': '2025-10-01 04:39:28.597082', 'step': 16229, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:28.650104', 'step': 16229, 'epoch': 3} {'type': 'loss', 'content': 0.06240324676036835, 'timestamp': '2025-10-01 04:39:28.652465', 'step': 16230, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:28.705861', 'step': 16230, 'epoch': 3} {'type': 'loss', 'content': 0.08377750217914581, 'timestamp': '2025-10-01 04:39:28.708892', 'step': 16231, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:39:28.762601', 'step': 16231, 'epoch': 3} {'type': 'loss', 'content': 0.07374927401542664, 'timestamp': '2025-10-01 04:39:28.768523', 'step': 16232, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:28.821637', 'step': 16232, 'epoch': 3} {'type': 'loss', 'content': 0.12262503802776337, 'timestamp': '2025-10-01 04:39:28.823968', 'step': 16233, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:39:28.878021', 'step': 16233, 'epoch': 3} {'type': 'loss', 'content': 0.08908263593912125, 'timestamp': '2025-10-01 04:39:28.880170', 'step': 16234, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:39:28.933963', 'step': 16234, 'epoch': 3} {'type': 'loss', 'content': 0.04864240437746048, 'timestamp': '2025-10-01 04:39:28.936463', 'step': 16235, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:28.993818', 'step': 16235, 'epoch': 3} {'type': 'loss', 'content': 0.08491400629281998, 'timestamp': '2025-10-01 04:39:29.003576', 'step': 16236, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:29.062579', 'step': 16236, 'epoch': 3} {'type': 'loss', 'content': 0.05287374556064606, 'timestamp': '2025-10-01 04:39:29.064810', 'step': 16237, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:29.118098', 'step': 16237, 'epoch': 3} {'type': 'loss', 'content': 0.07237366586923599, 'timestamp': '2025-10-01 04:39:29.120383', 'step': 16238, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:39:29.174260', 'step': 16238, 'epoch': 3} {'type': 'loss', 'content': 0.08597156405448914, 'timestamp': '2025-10-01 04:39:29.176493', 'step': 16239, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:29.229921', 'step': 16239, 'epoch': 3} {'type': 'loss', 'content': 0.029851006343960762, 'timestamp': '2025-10-01 04:39:29.235743', 'step': 16240, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:29.288398', 'step': 16240, 'epoch': 3} {'type': 'loss', 'content': 0.11191532760858536, 'timestamp': '2025-10-01 04:39:29.290915', 'step': 16241, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:39:29.349482', 'step': 16241, 'epoch': 3} {'type': 'loss', 'content': 0.07971442490816116, 'timestamp': '2025-10-01 04:39:29.351796', 'step': 16242, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:29.405568', 'step': 16242, 'epoch': 3} {'type': 'loss', 'content': 0.08239321410655975, 'timestamp': '2025-10-01 04:39:29.408091', 'step': 16243, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:39:29.461647', 'step': 16243, 'epoch': 3} {'type': 'loss', 'content': 0.06203838065266609, 'timestamp': '2025-10-01 04:39:29.467497', 'step': 16244, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:29.531615', 'step': 16244, 'epoch': 3} {'type': 'loss', 'content': 0.15032906830310822, 'timestamp': '2025-10-01 04:39:29.534449', 'step': 16245, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:29.587689', 'step': 16245, 'epoch': 3} {'type': 'loss', 'content': 0.0578126534819603, 'timestamp': '2025-10-01 04:39:29.590016', 'step': 16246, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:29.643314', 'step': 16246, 'epoch': 3} {'type': 'loss', 'content': 0.11396806687116623, 'timestamp': '2025-10-01 04:39:29.645565', 'step': 16247, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:29.702633', 'step': 16247, 'epoch': 3} {'type': 'loss', 'content': 0.07424979656934738, 'timestamp': '2025-10-01 04:39:29.708521', 'step': 16248, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:29.761703', 'step': 16248, 'epoch': 3} {'type': 'loss', 'content': 0.10998599976301193, 'timestamp': '2025-10-01 04:39:29.763832', 'step': 16249, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:29.817002', 'step': 16249, 'epoch': 3} {'type': 'loss', 'content': 0.053719282150268555, 'timestamp': '2025-10-01 04:39:29.819693', 'step': 16250, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:29.873070', 'step': 16250, 'epoch': 3} {'type': 'loss', 'content': 0.07557863742113113, 'timestamp': '2025-10-01 04:39:29.875334', 'step': 16251, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:29.934385', 'step': 16251, 'epoch': 3} {'type': 'loss', 'content': 0.1856832653284073, 'timestamp': '2025-10-01 04:39:29.940466', 'step': 16252, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:29.993103', 'step': 16252, 'epoch': 3} {'type': 'loss', 'content': 0.11540500819683075, 'timestamp': '2025-10-01 04:39:29.995364', 'step': 16253, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:39:30.050258', 'step': 16253, 'epoch': 3} {'type': 'loss', 'content': 0.10724137723445892, 'timestamp': '2025-10-01 04:39:30.052586', 'step': 16254, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:30.110957', 'step': 16254, 'epoch': 3} {'type': 'loss', 'content': 0.12575362622737885, 'timestamp': '2025-10-01 04:39:30.113196', 'step': 16255, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:30.166589', 'step': 16255, 'epoch': 3} {'type': 'loss', 'content': 0.0802014172077179, 'timestamp': '2025-10-01 04:39:30.172524', 'step': 16256, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:39:30.225681', 'step': 16256, 'epoch': 3} {'type': 'loss', 'content': 0.052881356328725815, 'timestamp': '2025-10-01 04:39:30.228967', 'step': 16257, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:30.284147', 'step': 16257, 'epoch': 3} {'type': 'loss', 'content': 0.045857131481170654, 'timestamp': '2025-10-01 04:39:30.286916', 'step': 16258, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:30.342321', 'step': 16258, 'epoch': 3} {'type': 'loss', 'content': 0.10370397567749023, 'timestamp': '2025-10-01 04:39:30.345094', 'step': 16259, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:30.398687', 'step': 16259, 'epoch': 3} {'type': 'loss', 'content': 0.03347369655966759, 'timestamp': '2025-10-01 04:39:30.404606', 'step': 16260, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:30.457663', 'step': 16260, 'epoch': 3} {'type': 'loss', 'content': 0.10016000270843506, 'timestamp': '2025-10-01 04:39:30.460096', 'step': 16261, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:30.514028', 'step': 16261, 'epoch': 3} {'type': 'loss', 'content': 0.09474489092826843, 'timestamp': '2025-10-01 04:39:30.516299', 'step': 16262, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:30.570140', 'step': 16262, 'epoch': 3} {'type': 'loss', 'content': 0.05793437361717224, 'timestamp': '2025-10-01 04:39:30.572443', 'step': 16263, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:30.626568', 'step': 16263, 'epoch': 3} {'type': 'loss', 'content': 0.06315721571445465, 'timestamp': '2025-10-01 04:39:30.632356', 'step': 16264, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:30.685057', 'step': 16264, 'epoch': 3} {'type': 'loss', 'content': 0.14689123630523682, 'timestamp': '2025-10-01 04:39:30.687127', 'step': 16265, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:30.741021', 'step': 16265, 'epoch': 3} {'type': 'loss', 'content': 0.07816853374242783, 'timestamp': '2025-10-01 04:39:30.743292', 'step': 16266, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:30.796900', 'step': 16266, 'epoch': 3} {'type': 'loss', 'content': 0.11805986613035202, 'timestamp': '2025-10-01 04:39:30.800077', 'step': 16267, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:30.853538', 'step': 16267, 'epoch': 3} {'type': 'loss', 'content': 0.16563184559345245, 'timestamp': '2025-10-01 04:39:30.864421', 'step': 16268, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:30.917724', 'step': 16268, 'epoch': 3} {'type': 'loss', 'content': 0.1416497528553009, 'timestamp': '2025-10-01 04:39:30.919955', 'step': 16269, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:39:30.974650', 'step': 16269, 'epoch': 3} {'type': 'loss', 'content': 0.06879157572984695, 'timestamp': '2025-10-01 04:39:30.976866', 'step': 16270, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:31.031417', 'step': 16270, 'epoch': 3} {'type': 'loss', 'content': 0.09914666414260864, 'timestamp': '2025-10-01 04:39:31.034568', 'step': 16271, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:31.088837', 'step': 16271, 'epoch': 3} {'type': 'loss', 'content': 0.04810504615306854, 'timestamp': '2025-10-01 04:39:31.094866', 'step': 16272, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:31.148851', 'step': 16272, 'epoch': 3} {'type': 'loss', 'content': 0.12028403580188751, 'timestamp': '2025-10-01 04:39:31.151452', 'step': 16273, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:31.204896', 'step': 16273, 'epoch': 3} {'type': 'loss', 'content': 0.105132095515728, 'timestamp': '2025-10-01 04:39:31.207031', 'step': 16274, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:31.260446', 'step': 16274, 'epoch': 3} {'type': 'loss', 'content': 0.16405825316905975, 'timestamp': '2025-10-01 04:39:31.262745', 'step': 16275, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:31.316599', 'step': 16275, 'epoch': 3} {'type': 'loss', 'content': 0.07532977312803268, 'timestamp': '2025-10-01 04:39:31.322858', 'step': 16276, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:39:31.376419', 'step': 16276, 'epoch': 3} {'type': 'loss', 'content': 0.12419282644987106, 'timestamp': '2025-10-01 04:39:31.378670', 'step': 16277, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:31.432825', 'step': 16277, 'epoch': 3} {'type': 'loss', 'content': 0.13432832062244415, 'timestamp': '2025-10-01 04:39:31.435025', 'step': 16278, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:39:31.489458', 'step': 16278, 'epoch': 3} {'type': 'loss', 'content': 0.16344523429870605, 'timestamp': '2025-10-01 04:39:31.491668', 'step': 16279, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:31.554322', 'step': 16279, 'epoch': 3} {'type': 'loss', 'content': 0.12963321805000305, 'timestamp': '2025-10-01 04:39:31.560620', 'step': 16280, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:31.613848', 'step': 16280, 'epoch': 3} {'type': 'loss', 'content': 0.09423341602087021, 'timestamp': '2025-10-01 04:39:31.616033', 'step': 16281, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:31.670538', 'step': 16281, 'epoch': 3} {'type': 'loss', 'content': 0.05757266283035278, 'timestamp': '2025-10-01 04:39:31.672812', 'step': 16282, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:39:31.728569', 'step': 16282, 'epoch': 3} {'type': 'loss', 'content': 0.10193513333797455, 'timestamp': '2025-10-01 04:39:31.732380', 'step': 16283, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:31.786617', 'step': 16283, 'epoch': 3} {'type': 'loss', 'content': 0.11999102681875229, 'timestamp': '2025-10-01 04:39:31.796978', 'step': 16284, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:31.852997', 'step': 16284, 'epoch': 3} {'type': 'loss', 'content': 0.06245996803045273, 'timestamp': '2025-10-01 04:39:31.855456', 'step': 16285, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:39:31.909528', 'step': 16285, 'epoch': 3} {'type': 'loss', 'content': 0.07818761467933655, 'timestamp': '2025-10-01 04:39:31.912119', 'step': 16286, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:31.966048', 'step': 16286, 'epoch': 3} {'type': 'loss', 'content': 0.09770223498344421, 'timestamp': '2025-10-01 04:39:31.968518', 'step': 16287, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:32.022526', 'step': 16287, 'epoch': 3} {'type': 'loss', 'content': 0.06239190325140953, 'timestamp': '2025-10-01 04:39:32.028554', 'step': 16288, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:32.081936', 'step': 16288, 'epoch': 3} {'type': 'loss', 'content': 0.07102635502815247, 'timestamp': '2025-10-01 04:39:32.086605', 'step': 16289, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:32.140118', 'step': 16289, 'epoch': 3} {'type': 'loss', 'content': 0.10499610006809235, 'timestamp': '2025-10-01 04:39:32.142344', 'step': 16290, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:32.203000', 'step': 16290, 'epoch': 3} {'type': 'loss', 'content': 0.11626023054122925, 'timestamp': '2025-10-01 04:39:32.206701', 'step': 16291, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:32.259960', 'step': 16291, 'epoch': 3} {'type': 'loss', 'content': 0.05842558667063713, 'timestamp': '2025-10-01 04:39:32.265784', 'step': 16292, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:32.318921', 'step': 16292, 'epoch': 3} {'type': 'loss', 'content': 0.024918492883443832, 'timestamp': '2025-10-01 04:39:32.321221', 'step': 16293, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:32.378878', 'step': 16293, 'epoch': 3} {'type': 'loss', 'content': 0.15303701162338257, 'timestamp': '2025-10-01 04:39:32.382124', 'step': 16294, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:39:32.435504', 'step': 16294, 'epoch': 3} {'type': 'loss', 'content': 0.053957898169755936, 'timestamp': '2025-10-01 04:39:32.437788', 'step': 16295, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:32.491141', 'step': 16295, 'epoch': 3} {'type': 'loss', 'content': 0.1442234367132187, 'timestamp': '2025-10-01 04:39:32.497104', 'step': 16296, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:39:32.552057', 'step': 16296, 'epoch': 3} {'type': 'loss', 'content': 0.08360961824655533, 'timestamp': '2025-10-01 04:39:32.554356', 'step': 16297, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:32.608744', 'step': 16297, 'epoch': 3} {'type': 'loss', 'content': 0.18218320608139038, 'timestamp': '2025-10-01 04:39:32.610948', 'step': 16298, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:32.665078', 'step': 16298, 'epoch': 3} {'type': 'loss', 'content': 0.09605420380830765, 'timestamp': '2025-10-01 04:39:32.667655', 'step': 16299, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:32.728868', 'step': 16299, 'epoch': 3} {'type': 'loss', 'content': 0.05077565833926201, 'timestamp': '2025-10-01 04:39:32.734762', 'step': 16300, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:39:32.788006', 'step': 16300, 'epoch': 3} {'type': 'loss', 'content': 0.09916115552186966, 'timestamp': '2025-10-01 04:39:32.790265', 'step': 16301, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:32.846232', 'step': 16301, 'epoch': 3} {'type': 'loss', 'content': 0.0796070471405983, 'timestamp': '2025-10-01 04:39:32.848448', 'step': 16302, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:32.901806', 'step': 16302, 'epoch': 3} {'type': 'loss', 'content': 0.11422254145145416, 'timestamp': '2025-10-01 04:39:32.904284', 'step': 16303, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:32.960209', 'step': 16303, 'epoch': 3} {'type': 'loss', 'content': 0.05375115945935249, 'timestamp': '2025-10-01 04:39:32.968980', 'step': 16304, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:33.022145', 'step': 16304, 'epoch': 3} {'type': 'loss', 'content': 0.1111079677939415, 'timestamp': '2025-10-01 04:39:33.024705', 'step': 16305, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:39:33.078658', 'step': 16305, 'epoch': 3} {'type': 'loss', 'content': 0.10455816984176636, 'timestamp': '2025-10-01 04:39:33.080994', 'step': 16306, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:33.134331', 'step': 16306, 'epoch': 3} {'type': 'loss', 'content': 0.06331761181354523, 'timestamp': '2025-10-01 04:39:33.136665', 'step': 16307, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:33.199538', 'step': 16307, 'epoch': 3} {'type': 'loss', 'content': 0.07035159319639206, 'timestamp': '2025-10-01 04:39:33.208704', 'step': 16308, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:33.261577', 'step': 16308, 'epoch': 3} {'type': 'loss', 'content': 0.1300646811723709, 'timestamp': '2025-10-01 04:39:33.264219', 'step': 16309, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:33.318075', 'step': 16309, 'epoch': 3} {'type': 'loss', 'content': 0.09134930372238159, 'timestamp': '2025-10-01 04:39:33.320389', 'step': 16310, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-01 04:39:46.583687', 'step': 16310, 'epoch': 3} {'type': 'pplx', 'content': 11068.96579294291, 'timestamp': '2025-10-01 04:39:46.586501', 'step': 16310, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:39:46.640340', 'step': 16310, 'epoch': 3} {'type': 'loss', 'content': 0.10128434002399445, 'timestamp': '2025-10-01 04:39:46.642779', 'step': 16311, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:39:46.697078', 'step': 16311, 'epoch': 3} {'type': 'loss', 'content': 0.031545333564281464, 'timestamp': '2025-10-01 04:39:46.703218', 'step': 16312, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:46.756256', 'step': 16312, 'epoch': 3} {'type': 'loss', 'content': 0.08376441895961761, 'timestamp': '2025-10-01 04:39:46.761737', 'step': 16313, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:46.815494', 'step': 16313, 'epoch': 3} {'type': 'loss', 'content': 0.12015528231859207, 'timestamp': '2025-10-01 04:39:46.817898', 'step': 16314, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:46.871375', 'step': 16314, 'epoch': 3} {'type': 'loss', 'content': 0.05530016869306564, 'timestamp': '2025-10-01 04:39:46.873540', 'step': 16315, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:46.928376', 'step': 16315, 'epoch': 3} {'type': 'loss', 'content': 0.17819596827030182, 'timestamp': '2025-10-01 04:39:46.934651', 'step': 16316, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:46.998479', 'step': 16316, 'epoch': 3} {'type': 'loss', 'content': 0.09654121100902557, 'timestamp': '2025-10-01 04:39:47.002079', 'step': 16317, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:47.056304', 'step': 16317, 'epoch': 3} {'type': 'loss', 'content': 0.050707511603832245, 'timestamp': '2025-10-01 04:39:47.058534', 'step': 16318, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:47.112425', 'step': 16318, 'epoch': 3} {'type': 'loss', 'content': 0.08469320088624954, 'timestamp': '2025-10-01 04:39:47.114835', 'step': 16319, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:47.172402', 'step': 16319, 'epoch': 3} {'type': 'loss', 'content': 0.11377330869436264, 'timestamp': '2025-10-01 04:39:47.178965', 'step': 16320, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:39:47.232783', 'step': 16320, 'epoch': 3} {'type': 'loss', 'content': 0.11518141627311707, 'timestamp': '2025-10-01 04:39:47.235392', 'step': 16321, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:47.288861', 'step': 16321, 'epoch': 3} {'type': 'loss', 'content': 0.09267416596412659, 'timestamp': '2025-10-01 04:39:47.291128', 'step': 16322, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:39:47.345711', 'step': 16322, 'epoch': 3} {'type': 'loss', 'content': 0.035980772227048874, 'timestamp': '2025-10-01 04:39:47.348124', 'step': 16323, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:47.401902', 'step': 16323, 'epoch': 3} {'type': 'loss', 'content': 0.10334527492523193, 'timestamp': '2025-10-01 04:39:47.407758', 'step': 16324, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:47.461280', 'step': 16324, 'epoch': 3} {'type': 'loss', 'content': 0.10635515302419662, 'timestamp': '2025-10-01 04:39:47.463503', 'step': 16325, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:39:47.518058', 'step': 16325, 'epoch': 3} {'type': 'loss', 'content': 0.0667988657951355, 'timestamp': '2025-10-01 04:39:47.520282', 'step': 16326, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:39:47.573616', 'step': 16326, 'epoch': 3} {'type': 'loss', 'content': 0.10651973634958267, 'timestamp': '2025-10-01 04:39:47.576099', 'step': 16327, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:47.629726', 'step': 16327, 'epoch': 3} {'type': 'loss', 'content': 0.1495959311723709, 'timestamp': '2025-10-01 04:39:47.635649', 'step': 16328, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:47.689776', 'step': 16328, 'epoch': 3} {'type': 'loss', 'content': 0.05236998572945595, 'timestamp': '2025-10-01 04:39:47.691915', 'step': 16329, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:47.745294', 'step': 16329, 'epoch': 3} {'type': 'loss', 'content': 0.0615423358976841, 'timestamp': '2025-10-01 04:39:47.747586', 'step': 16330, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:47.802297', 'step': 16330, 'epoch': 3} {'type': 'loss', 'content': 0.08906939625740051, 'timestamp': '2025-10-01 04:39:47.804889', 'step': 16331, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:47.858693', 'step': 16331, 'epoch': 3} {'type': 'loss', 'content': 0.04180099815130234, 'timestamp': '2025-10-01 04:39:47.864482', 'step': 16332, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:47.917766', 'step': 16332, 'epoch': 3} {'type': 'loss', 'content': 0.13770213723182678, 'timestamp': '2025-10-01 04:39:47.919880', 'step': 16333, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:39:47.974015', 'step': 16333, 'epoch': 3} {'type': 'loss', 'content': 0.08806554973125458, 'timestamp': '2025-10-01 04:39:47.976483', 'step': 16334, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:48.032656', 'step': 16334, 'epoch': 3} {'type': 'loss', 'content': 0.0851040706038475, 'timestamp': '2025-10-01 04:39:48.035282', 'step': 16335, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:48.088766', 'step': 16335, 'epoch': 3} {'type': 'loss', 'content': 0.1388428956270218, 'timestamp': '2025-10-01 04:39:48.094506', 'step': 16336, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:48.147833', 'step': 16336, 'epoch': 3} {'type': 'loss', 'content': 0.20118354260921478, 'timestamp': '2025-10-01 04:39:48.150125', 'step': 16337, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:39:48.204860', 'step': 16337, 'epoch': 3} {'type': 'loss', 'content': 0.116605244576931, 'timestamp': '2025-10-01 04:39:48.207858', 'step': 16338, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:48.261789', 'step': 16338, 'epoch': 3} {'type': 'loss', 'content': 0.11658015102148056, 'timestamp': '2025-10-01 04:39:48.269307', 'step': 16339, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:39:48.322340', 'step': 16339, 'epoch': 3} {'type': 'loss', 'content': 0.15273238718509674, 'timestamp': '2025-10-01 04:39:48.328190', 'step': 16340, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:48.381051', 'step': 16340, 'epoch': 3} {'type': 'loss', 'content': 0.05912095680832863, 'timestamp': '2025-10-01 04:39:48.383318', 'step': 16341, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:39:48.437624', 'step': 16341, 'epoch': 3} {'type': 'loss', 'content': 0.1535015106201172, 'timestamp': '2025-10-01 04:39:48.439670', 'step': 16342, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:48.493014', 'step': 16342, 'epoch': 3} {'type': 'loss', 'content': 0.08055001497268677, 'timestamp': '2025-10-01 04:39:48.497994', 'step': 16343, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:48.551136', 'step': 16343, 'epoch': 3} {'type': 'loss', 'content': 0.1332041621208191, 'timestamp': '2025-10-01 04:39:48.557927', 'step': 16344, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:48.611669', 'step': 16344, 'epoch': 3} {'type': 'loss', 'content': 0.08622130006551743, 'timestamp': '2025-10-01 04:39:48.614050', 'step': 16345, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:48.667655', 'step': 16345, 'epoch': 3} {'type': 'loss', 'content': 0.14901092648506165, 'timestamp': '2025-10-01 04:39:48.670548', 'step': 16346, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:39:48.724648', 'step': 16346, 'epoch': 3} {'type': 'loss', 'content': 0.05938634276390076, 'timestamp': '2025-10-01 04:39:48.726918', 'step': 16347, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:48.781759', 'step': 16347, 'epoch': 3} {'type': 'loss', 'content': 0.08820542693138123, 'timestamp': '2025-10-01 04:39:48.787671', 'step': 16348, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:48.841081', 'step': 16348, 'epoch': 3} {'type': 'loss', 'content': 0.033205509185791016, 'timestamp': '2025-10-01 04:39:48.843500', 'step': 16349, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:48.896997', 'step': 16349, 'epoch': 3} {'type': 'loss', 'content': 0.11458122730255127, 'timestamp': '2025-10-01 04:39:48.900119', 'step': 16350, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:48.954690', 'step': 16350, 'epoch': 3} {'type': 'loss', 'content': 0.11592715233564377, 'timestamp': '2025-10-01 04:39:48.957040', 'step': 16351, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:39:49.011295', 'step': 16351, 'epoch': 3} {'type': 'loss', 'content': 0.14115393161773682, 'timestamp': '2025-10-01 04:39:49.017397', 'step': 16352, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:49.071697', 'step': 16352, 'epoch': 3} {'type': 'loss', 'content': 0.06834413856267929, 'timestamp': '2025-10-01 04:39:49.077094', 'step': 16353, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:39:49.132228', 'step': 16353, 'epoch': 3} {'type': 'loss', 'content': 0.13694927096366882, 'timestamp': '2025-10-01 04:39:49.134399', 'step': 16354, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:49.188818', 'step': 16354, 'epoch': 3} {'type': 'loss', 'content': 0.050530388951301575, 'timestamp': '2025-10-01 04:39:49.190925', 'step': 16355, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:49.244716', 'step': 16355, 'epoch': 3} {'type': 'loss', 'content': 0.043838970363140106, 'timestamp': '2025-10-01 04:39:49.250726', 'step': 16356, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:49.305630', 'step': 16356, 'epoch': 3} {'type': 'loss', 'content': 0.08548861742019653, 'timestamp': '2025-10-01 04:39:49.307785', 'step': 16357, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:49.364752', 'step': 16357, 'epoch': 3} {'type': 'loss', 'content': 0.07052411884069443, 'timestamp': '2025-10-01 04:39:49.367119', 'step': 16358, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:39:49.421701', 'step': 16358, 'epoch': 3} {'type': 'loss', 'content': 0.05877947807312012, 'timestamp': '2025-10-01 04:39:49.424120', 'step': 16359, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:49.477175', 'step': 16359, 'epoch': 3} {'type': 'loss', 'content': 0.09768930077552795, 'timestamp': '2025-10-01 04:39:49.483245', 'step': 16360, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:49.538152', 'step': 16360, 'epoch': 3} {'type': 'loss', 'content': 0.06930529326200485, 'timestamp': '2025-10-01 04:39:49.541327', 'step': 16361, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:49.595459', 'step': 16361, 'epoch': 3} {'type': 'loss', 'content': 0.1519315093755722, 'timestamp': '2025-10-01 04:39:49.597796', 'step': 16362, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:49.657540', 'step': 16362, 'epoch': 3} {'type': 'loss', 'content': 0.09854600578546524, 'timestamp': '2025-10-01 04:39:49.662091', 'step': 16363, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:49.715949', 'step': 16363, 'epoch': 3} {'type': 'loss', 'content': 0.15228702127933502, 'timestamp': '2025-10-01 04:39:49.726935', 'step': 16364, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:49.780866', 'step': 16364, 'epoch': 3} {'type': 'loss', 'content': 0.2555743455886841, 'timestamp': '2025-10-01 04:39:49.783104', 'step': 16365, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:49.836384', 'step': 16365, 'epoch': 3} {'type': 'loss', 'content': 0.04457087814807892, 'timestamp': '2025-10-01 04:39:49.839031', 'step': 16366, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:49.894967', 'step': 16366, 'epoch': 3} {'type': 'loss', 'content': 0.10027165710926056, 'timestamp': '2025-10-01 04:39:49.897403', 'step': 16367, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:49.953693', 'step': 16367, 'epoch': 3} {'type': 'loss', 'content': 0.1428375244140625, 'timestamp': '2025-10-01 04:39:49.959500', 'step': 16368, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:50.013864', 'step': 16368, 'epoch': 3} {'type': 'loss', 'content': 0.07589501142501831, 'timestamp': '2025-10-01 04:39:50.016096', 'step': 16369, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:50.069799', 'step': 16369, 'epoch': 3} {'type': 'loss', 'content': 0.11068964004516602, 'timestamp': '2025-10-01 04:39:50.071927', 'step': 16370, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:50.133127', 'step': 16370, 'epoch': 3} {'type': 'loss', 'content': 0.1719982624053955, 'timestamp': '2025-10-01 04:39:50.135294', 'step': 16371, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:50.189623', 'step': 16371, 'epoch': 3} {'type': 'loss', 'content': 0.07800575345754623, 'timestamp': '2025-10-01 04:39:50.195763', 'step': 16372, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:50.251804', 'step': 16372, 'epoch': 3} {'type': 'loss', 'content': 0.10114645212888718, 'timestamp': '2025-10-01 04:39:50.257292', 'step': 16373, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:50.311554', 'step': 16373, 'epoch': 3} {'type': 'loss', 'content': 0.04146585613489151, 'timestamp': '2025-10-01 04:39:50.313935', 'step': 16374, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:50.369353', 'step': 16374, 'epoch': 3} {'type': 'loss', 'content': 0.14404982328414917, 'timestamp': '2025-10-01 04:39:50.371565', 'step': 16375, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:50.429055', 'step': 16375, 'epoch': 3} {'type': 'loss', 'content': 0.03188785910606384, 'timestamp': '2025-10-01 04:39:50.434659', 'step': 16376, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:50.488115', 'step': 16376, 'epoch': 3} {'type': 'loss', 'content': 0.041800402104854584, 'timestamp': '2025-10-01 04:39:50.490816', 'step': 16377, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:50.545999', 'step': 16377, 'epoch': 3} {'type': 'loss', 'content': 0.04391324147582054, 'timestamp': '2025-10-01 04:39:50.548370', 'step': 16378, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:50.604542', 'step': 16378, 'epoch': 3} {'type': 'loss', 'content': 0.09317370504140854, 'timestamp': '2025-10-01 04:39:50.606929', 'step': 16379, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:50.664604', 'step': 16379, 'epoch': 3} {'type': 'loss', 'content': 0.10141471773386002, 'timestamp': '2025-10-01 04:39:50.671089', 'step': 16380, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:50.726793', 'step': 16380, 'epoch': 3} {'type': 'loss', 'content': 0.0943923369050026, 'timestamp': '2025-10-01 04:39:50.729610', 'step': 16381, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:50.785362', 'step': 16381, 'epoch': 3} {'type': 'loss', 'content': 0.09645500779151917, 'timestamp': '2025-10-01 04:39:50.788142', 'step': 16382, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:50.844041', 'step': 16382, 'epoch': 3} {'type': 'loss', 'content': 0.15941444039344788, 'timestamp': '2025-10-01 04:39:50.846419', 'step': 16383, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:50.903140', 'step': 16383, 'epoch': 3} {'type': 'loss', 'content': 0.1495104283094406, 'timestamp': '2025-10-01 04:39:50.910599', 'step': 16384, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:50.965648', 'step': 16384, 'epoch': 3} {'type': 'loss', 'content': 0.09762401133775711, 'timestamp': '2025-10-01 04:39:50.969300', 'step': 16385, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:51.026921', 'step': 16385, 'epoch': 3} {'type': 'loss', 'content': 0.07905560731887817, 'timestamp': '2025-10-01 04:39:51.033498', 'step': 16386, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:51.098020', 'step': 16386, 'epoch': 3} {'type': 'loss', 'content': 0.07015332579612732, 'timestamp': '2025-10-01 04:39:51.104395', 'step': 16387, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:51.159835', 'step': 16387, 'epoch': 3} {'type': 'loss', 'content': 0.21219047904014587, 'timestamp': '2025-10-01 04:39:51.165873', 'step': 16388, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:51.220423', 'step': 16388, 'epoch': 3} {'type': 'loss', 'content': 0.18271389603614807, 'timestamp': '2025-10-01 04:39:51.223111', 'step': 16389, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:51.277044', 'step': 16389, 'epoch': 3} {'type': 'loss', 'content': 0.12087482213973999, 'timestamp': '2025-10-01 04:39:51.279470', 'step': 16390, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:51.333449', 'step': 16390, 'epoch': 3} {'type': 'loss', 'content': 0.07250336557626724, 'timestamp': '2025-10-01 04:39:51.335880', 'step': 16391, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:51.389856', 'step': 16391, 'epoch': 3} {'type': 'loss', 'content': 0.07359125465154648, 'timestamp': '2025-10-01 04:39:51.396070', 'step': 16392, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:51.449800', 'step': 16392, 'epoch': 3} {'type': 'loss', 'content': 0.04599622264504433, 'timestamp': '2025-10-01 04:39:51.452174', 'step': 16393, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:51.507367', 'step': 16393, 'epoch': 3} {'type': 'loss', 'content': 0.09819028526544571, 'timestamp': '2025-10-01 04:39:51.509873', 'step': 16394, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:51.565508', 'step': 16394, 'epoch': 3} {'type': 'loss', 'content': 0.060928527265787125, 'timestamp': '2025-10-01 04:39:51.568875', 'step': 16395, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:51.624780', 'step': 16395, 'epoch': 3} {'type': 'loss', 'content': 0.07098355889320374, 'timestamp': '2025-10-01 04:39:51.630879', 'step': 16396, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:51.685946', 'step': 16396, 'epoch': 3} {'type': 'loss', 'content': 0.1741078794002533, 'timestamp': '2025-10-01 04:39:51.688461', 'step': 16397, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:51.743757', 'step': 16397, 'epoch': 3} {'type': 'loss', 'content': 0.031839001923799515, 'timestamp': '2025-10-01 04:39:51.747596', 'step': 16398, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:51.801988', 'step': 16398, 'epoch': 3} {'type': 'loss', 'content': 0.10647646337747574, 'timestamp': '2025-10-01 04:39:51.804476', 'step': 16399, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:51.859420', 'step': 16399, 'epoch': 3} {'type': 'loss', 'content': 0.11577317118644714, 'timestamp': '2025-10-01 04:39:51.865710', 'step': 16400, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:51.920031', 'step': 16400, 'epoch': 3} {'type': 'loss', 'content': 0.06762849539518356, 'timestamp': '2025-10-01 04:39:51.923086', 'step': 16401, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:51.978824', 'step': 16401, 'epoch': 3} {'type': 'loss', 'content': 0.08569522202014923, 'timestamp': '2025-10-01 04:39:51.981230', 'step': 16402, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:52.035925', 'step': 16402, 'epoch': 3} {'type': 'loss', 'content': 0.06546952575445175, 'timestamp': '2025-10-01 04:39:52.038077', 'step': 16403, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:52.109444', 'step': 16403, 'epoch': 3} {'type': 'loss', 'content': 0.05398966372013092, 'timestamp': '2025-10-01 04:39:52.115981', 'step': 16404, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:52.169757', 'step': 16404, 'epoch': 3} {'type': 'loss', 'content': 0.11371083557605743, 'timestamp': '2025-10-01 04:39:52.172103', 'step': 16405, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:52.229696', 'step': 16405, 'epoch': 3} {'type': 'loss', 'content': 0.07554227113723755, 'timestamp': '2025-10-01 04:39:52.232109', 'step': 16406, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:52.293276', 'step': 16406, 'epoch': 3} {'type': 'loss', 'content': 0.08067162334918976, 'timestamp': '2025-10-01 04:39:52.295494', 'step': 16407, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:52.355447', 'step': 16407, 'epoch': 3} {'type': 'loss', 'content': 0.12535642087459564, 'timestamp': '2025-10-01 04:39:52.363139', 'step': 16408, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:39:52.422326', 'step': 16408, 'epoch': 3} {'type': 'loss', 'content': 0.11845884472131729, 'timestamp': '2025-10-01 04:39:52.424772', 'step': 16409, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:52.485848', 'step': 16409, 'epoch': 3} {'type': 'loss', 'content': 0.1240682601928711, 'timestamp': '2025-10-01 04:39:52.488520', 'step': 16410, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:52.549983', 'step': 16410, 'epoch': 3} {'type': 'loss', 'content': 0.08759192377328873, 'timestamp': '2025-10-01 04:39:52.552257', 'step': 16411, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:52.611355', 'step': 16411, 'epoch': 3} {'type': 'loss', 'content': 0.05222848430275917, 'timestamp': '2025-10-01 04:39:52.618548', 'step': 16412, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:52.678410', 'step': 16412, 'epoch': 3} {'type': 'loss', 'content': 0.10407667607069016, 'timestamp': '2025-10-01 04:39:52.680778', 'step': 16413, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:52.741246', 'step': 16413, 'epoch': 3} {'type': 'loss', 'content': 0.06194750592112541, 'timestamp': '2025-10-01 04:39:52.743525', 'step': 16414, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:52.803359', 'step': 16414, 'epoch': 3} {'type': 'loss', 'content': 0.06374581903219223, 'timestamp': '2025-10-01 04:39:52.806046', 'step': 16415, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:52.867366', 'step': 16415, 'epoch': 3} {'type': 'loss', 'content': 0.2128159999847412, 'timestamp': '2025-10-01 04:39:52.874476', 'step': 16416, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:52.935443', 'step': 16416, 'epoch': 3} {'type': 'loss', 'content': 0.09359659254550934, 'timestamp': '2025-10-01 04:39:52.937897', 'step': 16417, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:39:52.998363', 'step': 16417, 'epoch': 3} {'type': 'loss', 'content': 0.07764165103435516, 'timestamp': '2025-10-01 04:39:53.001031', 'step': 16418, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:39:53.073080', 'step': 16418, 'epoch': 3} {'type': 'loss', 'content': 0.04001843184232712, 'timestamp': '2025-10-01 04:39:53.075348', 'step': 16419, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:53.139692', 'step': 16419, 'epoch': 3} {'type': 'loss', 'content': 0.10002675652503967, 'timestamp': '2025-10-01 04:39:53.146951', 'step': 16420, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:53.206067', 'step': 16420, 'epoch': 3} {'type': 'loss', 'content': 0.1421082615852356, 'timestamp': '2025-10-01 04:39:53.208373', 'step': 16421, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:53.268819', 'step': 16421, 'epoch': 3} {'type': 'loss', 'content': 0.1388222873210907, 'timestamp': '2025-10-01 04:39:53.271323', 'step': 16422, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:53.330492', 'step': 16422, 'epoch': 3} {'type': 'loss', 'content': 0.13482427597045898, 'timestamp': '2025-10-01 04:39:53.333171', 'step': 16423, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:53.393371', 'step': 16423, 'epoch': 3} {'type': 'loss', 'content': 0.08183657377958298, 'timestamp': '2025-10-01 04:39:53.400771', 'step': 16424, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:53.464783', 'step': 16424, 'epoch': 3} {'type': 'loss', 'content': 0.1163238137960434, 'timestamp': '2025-10-01 04:39:53.467241', 'step': 16425, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:53.525709', 'step': 16425, 'epoch': 3} {'type': 'loss', 'content': 0.1269516497850418, 'timestamp': '2025-10-01 04:39:53.528143', 'step': 16426, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:53.590262', 'step': 16426, 'epoch': 3} {'type': 'loss', 'content': 0.0393974594771862, 'timestamp': '2025-10-01 04:39:53.592559', 'step': 16427, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:53.651859', 'step': 16427, 'epoch': 3} {'type': 'loss', 'content': 0.07480048388242722, 'timestamp': '2025-10-01 04:39:53.659977', 'step': 16428, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:53.729261', 'step': 16428, 'epoch': 3} {'type': 'loss', 'content': 0.08660165220499039, 'timestamp': '2025-10-01 04:39:53.734432', 'step': 16429, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:53.791319', 'step': 16429, 'epoch': 3} {'type': 'loss', 'content': 0.08365748822689056, 'timestamp': '2025-10-01 04:39:53.793457', 'step': 16430, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:39:53.848905', 'step': 16430, 'epoch': 3} {'type': 'loss', 'content': 0.03126569837331772, 'timestamp': '2025-10-01 04:39:53.851005', 'step': 16431, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:53.904531', 'step': 16431, 'epoch': 3} {'type': 'loss', 'content': 0.10833021253347397, 'timestamp': '2025-10-01 04:39:53.910713', 'step': 16432, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:53.963401', 'step': 16432, 'epoch': 3} {'type': 'loss', 'content': 0.10602694749832153, 'timestamp': '2025-10-01 04:39:53.965666', 'step': 16433, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:54.018329', 'step': 16433, 'epoch': 3} {'type': 'loss', 'content': 0.20297421514987946, 'timestamp': '2025-10-01 04:39:54.020640', 'step': 16434, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:39:54.073926', 'step': 16434, 'epoch': 3} {'type': 'loss', 'content': 0.07885942608118057, 'timestamp': '2025-10-01 04:39:54.076317', 'step': 16435, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:54.137116', 'step': 16435, 'epoch': 3} {'type': 'loss', 'content': 0.0867651179432869, 'timestamp': '2025-10-01 04:39:54.143075', 'step': 16436, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:54.197267', 'step': 16436, 'epoch': 3} {'type': 'loss', 'content': 0.07518482953310013, 'timestamp': '2025-10-01 04:39:54.199508', 'step': 16437, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:54.252852', 'step': 16437, 'epoch': 3} {'type': 'loss', 'content': 0.08447831124067307, 'timestamp': '2025-10-01 04:39:54.255218', 'step': 16438, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:39:54.309442', 'step': 16438, 'epoch': 3} {'type': 'loss', 'content': 0.18349474668502808, 'timestamp': '2025-10-01 04:39:54.315966', 'step': 16439, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:54.369191', 'step': 16439, 'epoch': 3} {'type': 'loss', 'content': 0.07422240078449249, 'timestamp': '2025-10-01 04:39:54.375090', 'step': 16440, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:54.428964', 'step': 16440, 'epoch': 3} {'type': 'loss', 'content': 0.01743854209780693, 'timestamp': '2025-10-01 04:39:54.431101', 'step': 16441, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:54.484412', 'step': 16441, 'epoch': 3} {'type': 'loss', 'content': 0.06593070924282074, 'timestamp': '2025-10-01 04:39:54.486519', 'step': 16442, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:39:54.540902', 'step': 16442, 'epoch': 3} {'type': 'loss', 'content': 0.06522151827812195, 'timestamp': '2025-10-01 04:39:54.543026', 'step': 16443, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:39:54.596401', 'step': 16443, 'epoch': 3} {'type': 'loss', 'content': 0.09724225103855133, 'timestamp': '2025-10-01 04:39:54.602485', 'step': 16444, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:54.655583', 'step': 16444, 'epoch': 3} {'type': 'loss', 'content': 0.13824643194675446, 'timestamp': '2025-10-01 04:39:54.657895', 'step': 16445, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:54.711006', 'step': 16445, 'epoch': 3} {'type': 'loss', 'content': 0.12184810638427734, 'timestamp': '2025-10-01 04:39:54.713686', 'step': 16446, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:54.768601', 'step': 16446, 'epoch': 3} {'type': 'loss', 'content': 0.11702505499124527, 'timestamp': '2025-10-01 04:39:54.770817', 'step': 16447, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:39:54.825400', 'step': 16447, 'epoch': 3} {'type': 'loss', 'content': 0.08729840070009232, 'timestamp': '2025-10-01 04:39:54.831534', 'step': 16448, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:54.885142', 'step': 16448, 'epoch': 3} {'type': 'loss', 'content': 0.08037164062261581, 'timestamp': '2025-10-01 04:39:54.887343', 'step': 16449, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:54.942207', 'step': 16449, 'epoch': 3} {'type': 'loss', 'content': 0.06305845826864243, 'timestamp': '2025-10-01 04:39:54.949766', 'step': 16450, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:55.003840', 'step': 16450, 'epoch': 3} {'type': 'loss', 'content': 0.07770371437072754, 'timestamp': '2025-10-01 04:39:55.006183', 'step': 16451, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:55.060531', 'step': 16451, 'epoch': 3} {'type': 'loss', 'content': 0.07447020709514618, 'timestamp': '2025-10-01 04:39:55.066535', 'step': 16452, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:55.119353', 'step': 16452, 'epoch': 3} {'type': 'loss', 'content': 0.08800409734249115, 'timestamp': '2025-10-01 04:39:55.121547', 'step': 16453, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:55.174389', 'step': 16453, 'epoch': 3} {'type': 'loss', 'content': 0.07428943365812302, 'timestamp': '2025-10-01 04:39:55.176455', 'step': 16454, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:55.229792', 'step': 16454, 'epoch': 3} {'type': 'loss', 'content': 0.13520272076129913, 'timestamp': '2025-10-01 04:39:55.232017', 'step': 16455, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:39:55.284946', 'step': 16455, 'epoch': 3} {'type': 'loss', 'content': 0.047867853194475174, 'timestamp': '2025-10-01 04:39:55.292901', 'step': 16456, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:55.346239', 'step': 16456, 'epoch': 3} {'type': 'loss', 'content': 0.05655007064342499, 'timestamp': '2025-10-01 04:39:55.348360', 'step': 16457, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:55.401826', 'step': 16457, 'epoch': 3} {'type': 'loss', 'content': 0.20710034668445587, 'timestamp': '2025-10-01 04:39:55.404590', 'step': 16458, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:55.460627', 'step': 16458, 'epoch': 3} {'type': 'loss', 'content': 0.07089073956012726, 'timestamp': '2025-10-01 04:39:55.462950', 'step': 16459, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:55.515769', 'step': 16459, 'epoch': 3} {'type': 'loss', 'content': 0.08892291784286499, 'timestamp': '2025-10-01 04:39:55.521652', 'step': 16460, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:55.574709', 'step': 16460, 'epoch': 3} {'type': 'loss', 'content': 0.14966705441474915, 'timestamp': '2025-10-01 04:39:55.576817', 'step': 16461, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:55.629311', 'step': 16461, 'epoch': 3} {'type': 'loss', 'content': 0.08368446677923203, 'timestamp': '2025-10-01 04:39:55.631394', 'step': 16462, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:55.684753', 'step': 16462, 'epoch': 3} {'type': 'loss', 'content': 0.052375517785549164, 'timestamp': '2025-10-01 04:39:55.686890', 'step': 16463, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:55.739875', 'step': 16463, 'epoch': 3} {'type': 'loss', 'content': 0.05515117943286896, 'timestamp': '2025-10-01 04:39:55.745554', 'step': 16464, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:55.798527', 'step': 16464, 'epoch': 3} {'type': 'loss', 'content': 0.17073571681976318, 'timestamp': '2025-10-01 04:39:55.800641', 'step': 16465, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:55.854136', 'step': 16465, 'epoch': 3} {'type': 'loss', 'content': 0.03503561019897461, 'timestamp': '2025-10-01 04:39:55.856351', 'step': 16466, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:55.909836', 'step': 16466, 'epoch': 3} {'type': 'loss', 'content': 0.060765884816646576, 'timestamp': '2025-10-01 04:39:55.912109', 'step': 16467, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:55.964992', 'step': 16467, 'epoch': 3} {'type': 'loss', 'content': 0.07214323431253433, 'timestamp': '2025-10-01 04:39:55.970780', 'step': 16468, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:56.023671', 'step': 16468, 'epoch': 3} {'type': 'loss', 'content': 0.10662852972745895, 'timestamp': '2025-10-01 04:39:56.026604', 'step': 16469, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:56.079837', 'step': 16469, 'epoch': 3} {'type': 'loss', 'content': 0.07823130488395691, 'timestamp': '2025-10-01 04:39:56.082515', 'step': 16470, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:39:56.137047', 'step': 16470, 'epoch': 3} {'type': 'loss', 'content': 0.0760287418961525, 'timestamp': '2025-10-01 04:39:56.139187', 'step': 16471, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:56.192478', 'step': 16471, 'epoch': 3} {'type': 'loss', 'content': 0.012567508965730667, 'timestamp': '2025-10-01 04:39:56.198340', 'step': 16472, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:56.250667', 'step': 16472, 'epoch': 3} {'type': 'loss', 'content': 0.16896037757396698, 'timestamp': '2025-10-01 04:39:56.252718', 'step': 16473, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:56.306617', 'step': 16473, 'epoch': 3} {'type': 'loss', 'content': 0.11214949190616608, 'timestamp': '2025-10-01 04:39:56.308786', 'step': 16474, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:56.363913', 'step': 16474, 'epoch': 3} {'type': 'loss', 'content': 0.1384805142879486, 'timestamp': '2025-10-01 04:39:56.365787', 'step': 16475, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:56.419595', 'step': 16475, 'epoch': 3} {'type': 'loss', 'content': 0.14949636161327362, 'timestamp': '2025-10-01 04:39:56.425679', 'step': 16476, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:56.478897', 'step': 16476, 'epoch': 3} {'type': 'loss', 'content': 0.07717316597700119, 'timestamp': '2025-10-01 04:39:56.480973', 'step': 16477, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:56.534241', 'step': 16477, 'epoch': 3} {'type': 'loss', 'content': 0.039997879415750504, 'timestamp': '2025-10-01 04:39:56.536382', 'step': 16478, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:39:56.590391', 'step': 16478, 'epoch': 3} {'type': 'loss', 'content': 0.08523377031087875, 'timestamp': '2025-10-01 04:39:56.592490', 'step': 16479, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:56.646350', 'step': 16479, 'epoch': 3} {'type': 'loss', 'content': 0.1240101009607315, 'timestamp': '2025-10-01 04:39:56.652439', 'step': 16480, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:56.706503', 'step': 16480, 'epoch': 3} {'type': 'loss', 'content': 0.06653793156147003, 'timestamp': '2025-10-01 04:39:56.708850', 'step': 16481, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:56.762114', 'step': 16481, 'epoch': 3} {'type': 'loss', 'content': 0.025661785155534744, 'timestamp': '2025-10-01 04:39:56.765145', 'step': 16482, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:39:56.819879', 'step': 16482, 'epoch': 3} {'type': 'loss', 'content': 0.08093926310539246, 'timestamp': '2025-10-01 04:39:56.821695', 'step': 16483, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:56.874968', 'step': 16483, 'epoch': 3} {'type': 'loss', 'content': 0.06947065889835358, 'timestamp': '2025-10-01 04:39:56.881382', 'step': 16484, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:56.934056', 'step': 16484, 'epoch': 3} {'type': 'loss', 'content': 0.08659163117408752, 'timestamp': '2025-10-01 04:39:56.936239', 'step': 16485, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:56.989429', 'step': 16485, 'epoch': 3} {'type': 'loss', 'content': 0.09934761375188828, 'timestamp': '2025-10-01 04:39:56.991595', 'step': 16486, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:57.046197', 'step': 16486, 'epoch': 3} {'type': 'loss', 'content': 0.047908905893564224, 'timestamp': '2025-10-01 04:39:57.048533', 'step': 16487, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:57.102163', 'step': 16487, 'epoch': 3} {'type': 'loss', 'content': 0.0871509239077568, 'timestamp': '2025-10-01 04:39:57.108168', 'step': 16488, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:57.162519', 'step': 16488, 'epoch': 3} {'type': 'loss', 'content': 0.11154968291521072, 'timestamp': '2025-10-01 04:39:57.164679', 'step': 16489, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:57.218641', 'step': 16489, 'epoch': 3} {'type': 'loss', 'content': 0.12346872687339783, 'timestamp': '2025-10-01 04:39:57.220872', 'step': 16490, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:57.274738', 'step': 16490, 'epoch': 3} {'type': 'loss', 'content': 0.07223797589540482, 'timestamp': '2025-10-01 04:39:57.276903', 'step': 16491, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:57.330443', 'step': 16491, 'epoch': 3} {'type': 'loss', 'content': 0.06646431982517242, 'timestamp': '2025-10-01 04:39:57.339019', 'step': 16492, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:39:57.391736', 'step': 16492, 'epoch': 3} {'type': 'loss', 'content': 0.06515834480524063, 'timestamp': '2025-10-01 04:39:57.393837', 'step': 16493, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:39:57.448470', 'step': 16493, 'epoch': 3} {'type': 'loss', 'content': 0.0805557519197464, 'timestamp': '2025-10-01 04:39:57.450933', 'step': 16494, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:57.504922', 'step': 16494, 'epoch': 3} {'type': 'loss', 'content': 0.10198774188756943, 'timestamp': '2025-10-01 04:39:57.510019', 'step': 16495, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:57.564408', 'step': 16495, 'epoch': 3} {'type': 'loss', 'content': 0.13211138546466827, 'timestamp': '2025-10-01 04:39:57.570259', 'step': 16496, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:57.622983', 'step': 16496, 'epoch': 3} {'type': 'loss', 'content': 0.08994950354099274, 'timestamp': '2025-10-01 04:39:57.625435', 'step': 16497, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:57.678442', 'step': 16497, 'epoch': 3} {'type': 'loss', 'content': 0.11547285318374634, 'timestamp': '2025-10-01 04:39:57.680484', 'step': 16498, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:57.734200', 'step': 16498, 'epoch': 3} {'type': 'loss', 'content': 0.08827590197324753, 'timestamp': '2025-10-01 04:39:57.736266', 'step': 16499, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:57.792473', 'step': 16499, 'epoch': 3} {'type': 'loss', 'content': 0.0955669954419136, 'timestamp': '2025-10-01 04:39:57.799984', 'step': 16500, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 16500', 'timestamp': '2025-10-01 04:39:58.167635', 'step': 16500, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:39:58.222176', 'step': 16500, 'epoch': 3} {'type': 'loss', 'content': 0.10125840455293655, 'timestamp': '2025-10-01 04:39:58.224745', 'step': 16501, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:58.281716', 'step': 16501, 'epoch': 3} {'type': 'loss', 'content': 0.14554694294929504, 'timestamp': '2025-10-01 04:39:58.283830', 'step': 16502, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:58.337203', 'step': 16502, 'epoch': 3} {'type': 'loss', 'content': 0.12837561964988708, 'timestamp': '2025-10-01 04:39:58.342951', 'step': 16503, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:58.399477', 'step': 16503, 'epoch': 3} {'type': 'loss', 'content': 0.06439979374408722, 'timestamp': '2025-10-01 04:39:58.406159', 'step': 16504, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:58.462201', 'step': 16504, 'epoch': 3} {'type': 'loss', 'content': 0.10823813825845718, 'timestamp': '2025-10-01 04:39:58.464236', 'step': 16505, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:58.518203', 'step': 16505, 'epoch': 3} {'type': 'loss', 'content': 0.09412138164043427, 'timestamp': '2025-10-01 04:39:58.520321', 'step': 16506, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:58.574005', 'step': 16506, 'epoch': 3} {'type': 'loss', 'content': 0.12651361525058746, 'timestamp': '2025-10-01 04:39:58.576585', 'step': 16507, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:58.630278', 'step': 16507, 'epoch': 3} {'type': 'loss', 'content': 0.07327153533697128, 'timestamp': '2025-10-01 04:39:58.637947', 'step': 16508, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:58.690919', 'step': 16508, 'epoch': 3} {'type': 'loss', 'content': 0.07587604969739914, 'timestamp': '2025-10-01 04:39:58.693217', 'step': 16509, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:58.749460', 'step': 16509, 'epoch': 3} {'type': 'loss', 'content': 0.13321393728256226, 'timestamp': '2025-10-01 04:39:58.753129', 'step': 16510, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:58.806289', 'step': 16510, 'epoch': 3} {'type': 'loss', 'content': 0.08600456267595291, 'timestamp': '2025-10-01 04:39:58.809826', 'step': 16511, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:58.862554', 'step': 16511, 'epoch': 3} {'type': 'loss', 'content': 0.14851704239845276, 'timestamp': '2025-10-01 04:39:58.868389', 'step': 16512, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:58.921732', 'step': 16512, 'epoch': 3} {'type': 'loss', 'content': 0.10262671858072281, 'timestamp': '2025-10-01 04:39:58.923521', 'step': 16513, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:58.976644', 'step': 16513, 'epoch': 3} {'type': 'loss', 'content': 0.15039175748825073, 'timestamp': '2025-10-01 04:39:58.979055', 'step': 16514, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:59.035251', 'step': 16514, 'epoch': 3} {'type': 'loss', 'content': 0.0872613787651062, 'timestamp': '2025-10-01 04:39:59.037562', 'step': 16515, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:59.091585', 'step': 16515, 'epoch': 3} {'type': 'loss', 'content': 0.0997903048992157, 'timestamp': '2025-10-01 04:39:59.097368', 'step': 16516, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:59.152339', 'step': 16516, 'epoch': 3} {'type': 'loss', 'content': 0.12082259356975555, 'timestamp': '2025-10-01 04:39:59.154425', 'step': 16517, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:59.208393', 'step': 16517, 'epoch': 3} {'type': 'loss', 'content': 0.08274181932210922, 'timestamp': '2025-10-01 04:39:59.210457', 'step': 16518, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:59.263893', 'step': 16518, 'epoch': 3} {'type': 'loss', 'content': 0.09504077583551407, 'timestamp': '2025-10-01 04:39:59.267213', 'step': 16519, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:59.321526', 'step': 16519, 'epoch': 3} {'type': 'loss', 'content': 0.11887285858392715, 'timestamp': '2025-10-01 04:39:59.329886', 'step': 16520, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:59.383793', 'step': 16520, 'epoch': 3} {'type': 'loss', 'content': 0.11128416657447815, 'timestamp': '2025-10-01 04:39:59.385896', 'step': 16521, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:59.440385', 'step': 16521, 'epoch': 3} {'type': 'loss', 'content': 0.12870323657989502, 'timestamp': '2025-10-01 04:39:59.442614', 'step': 16522, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:39:59.497008', 'step': 16522, 'epoch': 3} {'type': 'loss', 'content': 0.10609614849090576, 'timestamp': '2025-10-01 04:39:59.499191', 'step': 16523, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:59.555663', 'step': 16523, 'epoch': 3} {'type': 'loss', 'content': 0.13523781299591064, 'timestamp': '2025-10-01 04:39:59.562080', 'step': 16524, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:39:59.619829', 'step': 16524, 'epoch': 3} {'type': 'loss', 'content': 0.02774324081838131, 'timestamp': '2025-10-01 04:39:59.623782', 'step': 16525, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:59.694391', 'step': 16525, 'epoch': 3} {'type': 'loss', 'content': 0.09419382363557816, 'timestamp': '2025-10-01 04:39:59.706294', 'step': 16526, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:39:59.760623', 'step': 16526, 'epoch': 3} {'type': 'loss', 'content': 0.031425777822732925, 'timestamp': '2025-10-01 04:39:59.763228', 'step': 16527, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:59.820271', 'step': 16527, 'epoch': 3} {'type': 'loss', 'content': 0.11816681176424026, 'timestamp': '2025-10-01 04:39:59.826150', 'step': 16528, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:39:59.880100', 'step': 16528, 'epoch': 3} {'type': 'loss', 'content': 0.08858980238437653, 'timestamp': '2025-10-01 04:39:59.882233', 'step': 16529, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:39:59.941832', 'step': 16529, 'epoch': 3} {'type': 'loss', 'content': 0.07583195716142654, 'timestamp': '2025-10-01 04:39:59.944102', 'step': 16530, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:00.004162', 'step': 16530, 'epoch': 3} {'type': 'loss', 'content': 0.131796196103096, 'timestamp': '2025-10-01 04:40:00.006667', 'step': 16531, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:00.061116', 'step': 16531, 'epoch': 3} {'type': 'loss', 'content': 0.07996977865695953, 'timestamp': '2025-10-01 04:40:00.068522', 'step': 16532, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:00.123885', 'step': 16532, 'epoch': 3} {'type': 'loss', 'content': 0.09540888667106628, 'timestamp': '2025-10-01 04:40:00.132745', 'step': 16533, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:00.194765', 'step': 16533, 'epoch': 3} {'type': 'loss', 'content': 0.07224223762750626, 'timestamp': '2025-10-01 04:40:00.197029', 'step': 16534, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:00.256325', 'step': 16534, 'epoch': 3} {'type': 'loss', 'content': 0.20396699011325836, 'timestamp': '2025-10-01 04:40:00.258724', 'step': 16535, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:00.313586', 'step': 16535, 'epoch': 3} {'type': 'loss', 'content': 0.09262803196907043, 'timestamp': '2025-10-01 04:40:00.319505', 'step': 16536, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:00.375825', 'step': 16536, 'epoch': 3} {'type': 'loss', 'content': 0.09230905771255493, 'timestamp': '2025-10-01 04:40:00.378354', 'step': 16537, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:00.435927', 'step': 16537, 'epoch': 3} {'type': 'loss', 'content': 0.09631182998418808, 'timestamp': '2025-10-01 04:40:00.439103', 'step': 16538, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:00.497935', 'step': 16538, 'epoch': 3} {'type': 'loss', 'content': 0.07597655802965164, 'timestamp': '2025-10-01 04:40:00.501181', 'step': 16539, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:00.558040', 'step': 16539, 'epoch': 3} {'type': 'loss', 'content': 0.0724177286028862, 'timestamp': '2025-10-01 04:40:00.565067', 'step': 16540, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:00.619484', 'step': 16540, 'epoch': 3} {'type': 'loss', 'content': 0.11912453919649124, 'timestamp': '2025-10-01 04:40:00.621763', 'step': 16541, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:00.677521', 'step': 16541, 'epoch': 3} {'type': 'loss', 'content': 0.09038875997066498, 'timestamp': '2025-10-01 04:40:00.681934', 'step': 16542, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:40:00.737335', 'step': 16542, 'epoch': 3} {'type': 'loss', 'content': 0.12621645629405975, 'timestamp': '2025-10-01 04:40:00.739973', 'step': 16543, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:00.794466', 'step': 16543, 'epoch': 3} {'type': 'loss', 'content': 0.12249390780925751, 'timestamp': '2025-10-01 04:40:00.800647', 'step': 16544, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:00.856331', 'step': 16544, 'epoch': 3} {'type': 'loss', 'content': 0.06560281664133072, 'timestamp': '2025-10-01 04:40:00.859213', 'step': 16545, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:00.913581', 'step': 16545, 'epoch': 3} {'type': 'loss', 'content': 0.07396427541971207, 'timestamp': '2025-10-01 04:40:00.915921', 'step': 16546, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:40:00.991768', 'step': 16546, 'epoch': 3} {'type': 'loss', 'content': 0.09272164106369019, 'timestamp': '2025-10-01 04:40:00.994461', 'step': 16547, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:01.048292', 'step': 16547, 'epoch': 3} {'type': 'loss', 'content': 0.06539875268936157, 'timestamp': '2025-10-01 04:40:01.054280', 'step': 16548, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:01.108011', 'step': 16548, 'epoch': 3} {'type': 'loss', 'content': 0.08817783743143082, 'timestamp': '2025-10-01 04:40:01.110471', 'step': 16549, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:40:01.165480', 'step': 16549, 'epoch': 3} {'type': 'loss', 'content': 0.1407088190317154, 'timestamp': '2025-10-01 04:40:01.168343', 'step': 16550, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:01.223003', 'step': 16550, 'epoch': 3} {'type': 'loss', 'content': 0.0463310182094574, 'timestamp': '2025-10-01 04:40:01.225477', 'step': 16551, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:40:01.279765', 'step': 16551, 'epoch': 3} {'type': 'loss', 'content': 0.09848937392234802, 'timestamp': '2025-10-01 04:40:01.286022', 'step': 16552, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:40:01.343528', 'step': 16552, 'epoch': 3} {'type': 'loss', 'content': 0.08710664510726929, 'timestamp': '2025-10-01 04:40:01.346053', 'step': 16553, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:01.400381', 'step': 16553, 'epoch': 3} {'type': 'loss', 'content': 0.15187479555606842, 'timestamp': '2025-10-01 04:40:01.403660', 'step': 16554, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:01.458451', 'step': 16554, 'epoch': 3} {'type': 'loss', 'content': 0.0805845707654953, 'timestamp': '2025-10-01 04:40:01.460554', 'step': 16555, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:01.518957', 'step': 16555, 'epoch': 3} {'type': 'loss', 'content': 0.10578204691410065, 'timestamp': '2025-10-01 04:40:01.524702', 'step': 16556, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:01.578358', 'step': 16556, 'epoch': 3} {'type': 'loss', 'content': 0.15796047449111938, 'timestamp': '2025-10-01 04:40:01.580487', 'step': 16557, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:01.634010', 'step': 16557, 'epoch': 3} {'type': 'loss', 'content': 0.16031762957572937, 'timestamp': '2025-10-01 04:40:01.636720', 'step': 16558, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:01.691913', 'step': 16558, 'epoch': 3} {'type': 'loss', 'content': 0.1600407510995865, 'timestamp': '2025-10-01 04:40:01.694009', 'step': 16559, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:01.747733', 'step': 16559, 'epoch': 3} {'type': 'loss', 'content': 0.14851877093315125, 'timestamp': '2025-10-01 04:40:01.753649', 'step': 16560, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:01.807160', 'step': 16560, 'epoch': 3} {'type': 'loss', 'content': 0.11858948320150375, 'timestamp': '2025-10-01 04:40:01.810796', 'step': 16561, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:01.869875', 'step': 16561, 'epoch': 3} {'type': 'loss', 'content': 0.11988664418458939, 'timestamp': '2025-10-01 04:40:01.872533', 'step': 16562, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:01.927752', 'step': 16562, 'epoch': 3} {'type': 'loss', 'content': 0.12540015578269958, 'timestamp': '2025-10-01 04:40:01.929856', 'step': 16563, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:01.983541', 'step': 16563, 'epoch': 3} {'type': 'loss', 'content': 0.14968891441822052, 'timestamp': '2025-10-01 04:40:01.989502', 'step': 16564, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:02.042936', 'step': 16564, 'epoch': 3} {'type': 'loss', 'content': 0.053382374346256256, 'timestamp': '2025-10-01 04:40:02.045405', 'step': 16565, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:02.101957', 'step': 16565, 'epoch': 3} {'type': 'loss', 'content': 0.043489955365657806, 'timestamp': '2025-10-01 04:40:02.104148', 'step': 16566, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:02.158927', 'step': 16566, 'epoch': 3} {'type': 'loss', 'content': 0.04766303300857544, 'timestamp': '2025-10-01 04:40:02.161333', 'step': 16567, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:40:02.215403', 'step': 16567, 'epoch': 3} {'type': 'loss', 'content': 0.07901526242494583, 'timestamp': '2025-10-01 04:40:02.221065', 'step': 16568, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:02.280447', 'step': 16568, 'epoch': 3} {'type': 'loss', 'content': 0.09259676188230515, 'timestamp': '2025-10-01 04:40:02.282646', 'step': 16569, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:02.336045', 'step': 16569, 'epoch': 3} {'type': 'loss', 'content': 0.11979436874389648, 'timestamp': '2025-10-01 04:40:02.340252', 'step': 16570, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:02.393379', 'step': 16570, 'epoch': 3} {'type': 'loss', 'content': 0.07421030104160309, 'timestamp': '2025-10-01 04:40:02.395635', 'step': 16571, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:02.448878', 'step': 16571, 'epoch': 3} {'type': 'loss', 'content': 0.05408027023077011, 'timestamp': '2025-10-01 04:40:02.454816', 'step': 16572, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:02.507563', 'step': 16572, 'epoch': 3} {'type': 'loss', 'content': 0.13995148241519928, 'timestamp': '2025-10-01 04:40:02.509626', 'step': 16573, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:02.563065', 'step': 16573, 'epoch': 3} {'type': 'loss', 'content': 0.05594702064990997, 'timestamp': '2025-10-01 04:40:02.565409', 'step': 16574, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:02.619244', 'step': 16574, 'epoch': 3} {'type': 'loss', 'content': 0.16045048832893372, 'timestamp': '2025-10-01 04:40:02.621435', 'step': 16575, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:02.674263', 'step': 16575, 'epoch': 3} {'type': 'loss', 'content': 0.12779618799686432, 'timestamp': '2025-10-01 04:40:02.682215', 'step': 16576, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:02.735985', 'step': 16576, 'epoch': 3} {'type': 'loss', 'content': 0.06261926889419556, 'timestamp': '2025-10-01 04:40:02.737968', 'step': 16577, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:02.790729', 'step': 16577, 'epoch': 3} {'type': 'loss', 'content': 0.07782524079084396, 'timestamp': '2025-10-01 04:40:02.793244', 'step': 16578, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:02.846312', 'step': 16578, 'epoch': 3} {'type': 'loss', 'content': 0.10795775800943375, 'timestamp': '2025-10-01 04:40:02.848521', 'step': 16579, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:02.902456', 'step': 16579, 'epoch': 3} {'type': 'loss', 'content': 0.10283415019512177, 'timestamp': '2025-10-01 04:40:02.908388', 'step': 16580, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:40:02.962427', 'step': 16580, 'epoch': 3} {'type': 'loss', 'content': 0.04464522376656532, 'timestamp': '2025-10-01 04:40:02.965123', 'step': 16581, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:40:03.026858', 'step': 16581, 'epoch': 3} {'type': 'loss', 'content': 0.11469640582799911, 'timestamp': '2025-10-01 04:40:03.029078', 'step': 16582, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:03.086239', 'step': 16582, 'epoch': 3} {'type': 'loss', 'content': 0.07691219449043274, 'timestamp': '2025-10-01 04:40:03.088630', 'step': 16583, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:40:03.142431', 'step': 16583, 'epoch': 3} {'type': 'loss', 'content': 0.10122841596603394, 'timestamp': '2025-10-01 04:40:03.148290', 'step': 16584, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:03.201145', 'step': 16584, 'epoch': 3} {'type': 'loss', 'content': 0.0880206897854805, 'timestamp': '2025-10-01 04:40:03.203350', 'step': 16585, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:03.256806', 'step': 16585, 'epoch': 3} {'type': 'loss', 'content': 0.06846088916063309, 'timestamp': '2025-10-01 04:40:03.258946', 'step': 16586, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:03.313312', 'step': 16586, 'epoch': 3} {'type': 'loss', 'content': 0.10641464591026306, 'timestamp': '2025-10-01 04:40:03.315457', 'step': 16587, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:03.368818', 'step': 16587, 'epoch': 3} {'type': 'loss', 'content': 0.1109442189335823, 'timestamp': '2025-10-01 04:40:03.374735', 'step': 16588, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:03.428196', 'step': 16588, 'epoch': 3} {'type': 'loss', 'content': 0.04415971040725708, 'timestamp': '2025-10-01 04:40:03.438639', 'step': 16589, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:03.492688', 'step': 16589, 'epoch': 3} {'type': 'loss', 'content': 0.08175931870937347, 'timestamp': '2025-10-01 04:40:03.496681', 'step': 16590, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:03.553933', 'step': 16590, 'epoch': 3} {'type': 'loss', 'content': 0.10369210690259933, 'timestamp': '2025-10-01 04:40:03.556067', 'step': 16591, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:40:03.609698', 'step': 16591, 'epoch': 3} {'type': 'loss', 'content': 0.10206623375415802, 'timestamp': '2025-10-01 04:40:03.615768', 'step': 16592, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:40:03.670894', 'step': 16592, 'epoch': 3} {'type': 'loss', 'content': 0.060632988810539246, 'timestamp': '2025-10-01 04:40:03.673224', 'step': 16593, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:03.727782', 'step': 16593, 'epoch': 3} {'type': 'loss', 'content': 0.0834096297621727, 'timestamp': '2025-10-01 04:40:03.730253', 'step': 16594, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:03.784645', 'step': 16594, 'epoch': 3} {'type': 'loss', 'content': 0.13261467218399048, 'timestamp': '2025-10-01 04:40:03.786831', 'step': 16595, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:03.841136', 'step': 16595, 'epoch': 3} {'type': 'loss', 'content': 0.07062861323356628, 'timestamp': '2025-10-01 04:40:03.847186', 'step': 16596, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:03.900123', 'step': 16596, 'epoch': 3} {'type': 'loss', 'content': 0.06079111993312836, 'timestamp': '2025-10-01 04:40:03.906743', 'step': 16597, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:03.961271', 'step': 16597, 'epoch': 3} {'type': 'loss', 'content': 0.08469592779874802, 'timestamp': '2025-10-01 04:40:03.963238', 'step': 16598, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-01 04:40:04.028420', 'step': 16598, 'epoch': 3} {'type': 'loss', 'content': 0.09062158316373825, 'timestamp': '2025-10-01 04:40:04.030569', 'step': 16599, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:04.085700', 'step': 16599, 'epoch': 3} {'type': 'loss', 'content': 0.12556491792201996, 'timestamp': '2025-10-01 04:40:04.091550', 'step': 16600, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:04.144468', 'step': 16600, 'epoch': 3} {'type': 'loss', 'content': 0.08596111088991165, 'timestamp': '2025-10-01 04:40:04.147147', 'step': 16601, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:04.201231', 'step': 16601, 'epoch': 3} {'type': 'loss', 'content': 0.06209757179021835, 'timestamp': '2025-10-01 04:40:04.204420', 'step': 16602, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:40:04.258152', 'step': 16602, 'epoch': 3} {'type': 'loss', 'content': 0.057809337973594666, 'timestamp': '2025-10-01 04:40:04.260417', 'step': 16603, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:04.316365', 'step': 16603, 'epoch': 3} {'type': 'loss', 'content': 0.11462084949016571, 'timestamp': '2025-10-01 04:40:04.322360', 'step': 16604, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:04.376991', 'step': 16604, 'epoch': 3} {'type': 'loss', 'content': 0.17291200160980225, 'timestamp': '2025-10-01 04:40:04.379168', 'step': 16605, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:04.432821', 'step': 16605, 'epoch': 3} {'type': 'loss', 'content': 0.06935914605855942, 'timestamp': '2025-10-01 04:40:04.438406', 'step': 16606, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:04.500776', 'step': 16606, 'epoch': 3} {'type': 'loss', 'content': 0.20793232321739197, 'timestamp': '2025-10-01 04:40:04.503031', 'step': 16607, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:04.556776', 'step': 16607, 'epoch': 3} {'type': 'loss', 'content': 0.028439832851290703, 'timestamp': '2025-10-01 04:40:04.562662', 'step': 16608, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:04.616451', 'step': 16608, 'epoch': 3} {'type': 'loss', 'content': 0.04501105099916458, 'timestamp': '2025-10-01 04:40:04.626390', 'step': 16609, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:04.688637', 'step': 16609, 'epoch': 3} {'type': 'loss', 'content': 0.14379557967185974, 'timestamp': '2025-10-01 04:40:04.691373', 'step': 16610, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:40:04.754762', 'step': 16610, 'epoch': 3} {'type': 'loss', 'content': 0.1927962750196457, 'timestamp': '2025-10-01 04:40:04.757002', 'step': 16611, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:04.811728', 'step': 16611, 'epoch': 3} {'type': 'loss', 'content': 0.05861090496182442, 'timestamp': '2025-10-01 04:40:04.818363', 'step': 16612, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:40:04.871993', 'step': 16612, 'epoch': 3} {'type': 'loss', 'content': 0.15088389813899994, 'timestamp': '2025-10-01 04:40:04.875060', 'step': 16613, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:04.928704', 'step': 16613, 'epoch': 3} {'type': 'loss', 'content': 0.07917368412017822, 'timestamp': '2025-10-01 04:40:04.930902', 'step': 16614, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:40:04.984421', 'step': 16614, 'epoch': 3} {'type': 'loss', 'content': 0.07412614673376083, 'timestamp': '2025-10-01 04:40:04.989484', 'step': 16615, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:05.044704', 'step': 16615, 'epoch': 3} {'type': 'loss', 'content': 0.060331687331199646, 'timestamp': '2025-10-01 04:40:05.060501', 'step': 16616, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:40:05.113749', 'step': 16616, 'epoch': 3} {'type': 'loss', 'content': 0.09667525440454483, 'timestamp': '2025-10-01 04:40:05.116118', 'step': 16617, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:05.172463', 'step': 16617, 'epoch': 3} {'type': 'loss', 'content': 0.07555339485406876, 'timestamp': '2025-10-01 04:40:05.174711', 'step': 16618, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:40:05.230464', 'step': 16618, 'epoch': 3} {'type': 'loss', 'content': 0.05024333298206329, 'timestamp': '2025-10-01 04:40:05.238410', 'step': 16619, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:40:05.294840', 'step': 16619, 'epoch': 3} {'type': 'loss', 'content': 0.15142278373241425, 'timestamp': '2025-10-01 04:40:05.300902', 'step': 16620, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:40:05.353914', 'step': 16620, 'epoch': 3} {'type': 'loss', 'content': 0.047583963721990585, 'timestamp': '2025-10-01 04:40:05.357086', 'step': 16621, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:05.410502', 'step': 16621, 'epoch': 3} {'type': 'loss', 'content': 0.07742118835449219, 'timestamp': '2025-10-01 04:40:05.412786', 'step': 16622, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:05.466504', 'step': 16622, 'epoch': 3} {'type': 'loss', 'content': 0.06389794498682022, 'timestamp': '2025-10-01 04:40:05.469746', 'step': 16623, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:05.525094', 'step': 16623, 'epoch': 3} {'type': 'loss', 'content': 0.053812142461538315, 'timestamp': '2025-10-01 04:40:05.531035', 'step': 16624, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:05.584479', 'step': 16624, 'epoch': 3} {'type': 'loss', 'content': 0.21167771518230438, 'timestamp': '2025-10-01 04:40:05.586779', 'step': 16625, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:05.640014', 'step': 16625, 'epoch': 3} {'type': 'loss', 'content': 0.12208468466997147, 'timestamp': '2025-10-01 04:40:05.642480', 'step': 16626, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:40:05.697840', 'step': 16626, 'epoch': 3} {'type': 'loss', 'content': 0.06221260875463486, 'timestamp': '2025-10-01 04:40:05.700065', 'step': 16627, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:05.753076', 'step': 16627, 'epoch': 3} {'type': 'loss', 'content': 0.08886032551527023, 'timestamp': '2025-10-01 04:40:05.758965', 'step': 16628, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:40:05.811597', 'step': 16628, 'epoch': 3} {'type': 'loss', 'content': 0.0630655288696289, 'timestamp': '2025-10-01 04:40:05.813705', 'step': 16629, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:05.877893', 'step': 16629, 'epoch': 3} {'type': 'loss', 'content': 0.09308964759111404, 'timestamp': '2025-10-01 04:40:05.880042', 'step': 16630, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:40:05.934039', 'step': 16630, 'epoch': 3} {'type': 'loss', 'content': 0.08348304778337479, 'timestamp': '2025-10-01 04:40:05.936213', 'step': 16631, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:05.997309', 'step': 16631, 'epoch': 3} {'type': 'loss', 'content': 0.08582994341850281, 'timestamp': '2025-10-01 04:40:06.003983', 'step': 16632, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:06.057525', 'step': 16632, 'epoch': 3} {'type': 'loss', 'content': 0.03254259005188942, 'timestamp': '2025-10-01 04:40:06.060522', 'step': 16633, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:06.115211', 'step': 16633, 'epoch': 3} {'type': 'loss', 'content': 0.17080940306186676, 'timestamp': '2025-10-01 04:40:06.117425', 'step': 16634, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:06.170766', 'step': 16634, 'epoch': 3} {'type': 'loss', 'content': 0.17667745053768158, 'timestamp': '2025-10-01 04:40:06.172982', 'step': 16635, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:06.226079', 'step': 16635, 'epoch': 3} {'type': 'loss', 'content': 0.04297402873635292, 'timestamp': '2025-10-01 04:40:06.231913', 'step': 16636, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:06.285493', 'step': 16636, 'epoch': 3} {'type': 'loss', 'content': 0.05658166483044624, 'timestamp': '2025-10-01 04:40:06.287924', 'step': 16637, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:06.341344', 'step': 16637, 'epoch': 3} {'type': 'loss', 'content': 0.10769861936569214, 'timestamp': '2025-10-01 04:40:06.346520', 'step': 16638, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:06.400412', 'step': 16638, 'epoch': 3} {'type': 'loss', 'content': 0.22096623480319977, 'timestamp': '2025-10-01 04:40:06.402697', 'step': 16639, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:06.456259', 'step': 16639, 'epoch': 3} {'type': 'loss', 'content': 0.060997720807790756, 'timestamp': '2025-10-01 04:40:06.462701', 'step': 16640, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:06.515257', 'step': 16640, 'epoch': 3} {'type': 'loss', 'content': 0.09867475181818008, 'timestamp': '2025-10-01 04:40:06.518110', 'step': 16641, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:06.571520', 'step': 16641, 'epoch': 3} {'type': 'loss', 'content': 0.1149047389626503, 'timestamp': '2025-10-01 04:40:06.573777', 'step': 16642, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:40:06.627917', 'step': 16642, 'epoch': 3} {'type': 'loss', 'content': 0.14242064952850342, 'timestamp': '2025-10-01 04:40:06.630243', 'step': 16643, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:06.684342', 'step': 16643, 'epoch': 3} {'type': 'loss', 'content': 0.10115248709917068, 'timestamp': '2025-10-01 04:40:06.690711', 'step': 16644, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:40:06.753968', 'step': 16644, 'epoch': 3} {'type': 'loss', 'content': 0.05706474184989929, 'timestamp': '2025-10-01 04:40:06.756613', 'step': 16645, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:06.811940', 'step': 16645, 'epoch': 3} {'type': 'loss', 'content': 0.099913589656353, 'timestamp': '2025-10-01 04:40:06.814156', 'step': 16646, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:06.870166', 'step': 16646, 'epoch': 3} {'type': 'loss', 'content': 0.09699349105358124, 'timestamp': '2025-10-01 04:40:06.879534', 'step': 16647, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:06.934789', 'step': 16647, 'epoch': 3} {'type': 'loss', 'content': 0.06457758694887161, 'timestamp': '2025-10-01 04:40:06.941538', 'step': 16648, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:06.995685', 'step': 16648, 'epoch': 3} {'type': 'loss', 'content': 0.14243458211421967, 'timestamp': '2025-10-01 04:40:06.997991', 'step': 16649, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:07.053039', 'step': 16649, 'epoch': 3} {'type': 'loss', 'content': 0.10478067398071289, 'timestamp': '2025-10-01 04:40:07.055253', 'step': 16650, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:07.128325', 'step': 16650, 'epoch': 3} {'type': 'loss', 'content': 0.05213608220219612, 'timestamp': '2025-10-01 04:40:07.130546', 'step': 16651, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:07.185232', 'step': 16651, 'epoch': 3} {'type': 'loss', 'content': 0.09392877668142319, 'timestamp': '2025-10-01 04:40:07.191298', 'step': 16652, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:40:07.245201', 'step': 16652, 'epoch': 3} {'type': 'loss', 'content': 0.14649350941181183, 'timestamp': '2025-10-01 04:40:07.247820', 'step': 16653, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:07.303289', 'step': 16653, 'epoch': 3} {'type': 'loss', 'content': 0.1109638661146164, 'timestamp': '2025-10-01 04:40:07.305625', 'step': 16654, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:07.359258', 'step': 16654, 'epoch': 3} {'type': 'loss', 'content': 0.11396986991167068, 'timestamp': '2025-10-01 04:40:07.361549', 'step': 16655, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:07.415420', 'step': 16655, 'epoch': 3} {'type': 'loss', 'content': 0.10876297950744629, 'timestamp': '2025-10-01 04:40:07.421601', 'step': 16656, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:07.474472', 'step': 16656, 'epoch': 3} {'type': 'loss', 'content': 0.038210153579711914, 'timestamp': '2025-10-01 04:40:07.476594', 'step': 16657, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:40:07.529720', 'step': 16657, 'epoch': 3} {'type': 'loss', 'content': 0.11281585693359375, 'timestamp': '2025-10-01 04:40:07.531859', 'step': 16658, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:07.585533', 'step': 16658, 'epoch': 3} {'type': 'loss', 'content': 0.12405867129564285, 'timestamp': '2025-10-01 04:40:07.587890', 'step': 16659, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:07.641400', 'step': 16659, 'epoch': 3} {'type': 'loss', 'content': 0.19432885944843292, 'timestamp': '2025-10-01 04:40:07.655070', 'step': 16660, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:07.708445', 'step': 16660, 'epoch': 3} {'type': 'loss', 'content': 0.0884435623884201, 'timestamp': '2025-10-01 04:40:07.710804', 'step': 16661, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:07.764345', 'step': 16661, 'epoch': 3} {'type': 'loss', 'content': 0.06110069528222084, 'timestamp': '2025-10-01 04:40:07.766778', 'step': 16662, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:07.820570', 'step': 16662, 'epoch': 3} {'type': 'loss', 'content': 0.0683966875076294, 'timestamp': '2025-10-01 04:40:07.822775', 'step': 16663, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:07.876387', 'step': 16663, 'epoch': 3} {'type': 'loss', 'content': 0.04581344127655029, 'timestamp': '2025-10-01 04:40:07.882951', 'step': 16664, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:07.936658', 'step': 16664, 'epoch': 3} {'type': 'loss', 'content': 0.13473519682884216, 'timestamp': '2025-10-01 04:40:07.939204', 'step': 16665, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:07.993018', 'step': 16665, 'epoch': 3} {'type': 'loss', 'content': 0.10669030994176865, 'timestamp': '2025-10-01 04:40:07.995662', 'step': 16666, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:08.050849', 'step': 16666, 'epoch': 3} {'type': 'loss', 'content': 0.0566820427775383, 'timestamp': '2025-10-01 04:40:08.053538', 'step': 16667, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:08.109257', 'step': 16667, 'epoch': 3} {'type': 'loss', 'content': 0.0747590884566307, 'timestamp': '2025-10-01 04:40:08.115735', 'step': 16668, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:08.182527', 'step': 16668, 'epoch': 3} {'type': 'loss', 'content': 0.06194276735186577, 'timestamp': '2025-10-01 04:40:08.184799', 'step': 16669, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:08.239984', 'step': 16669, 'epoch': 3} {'type': 'loss', 'content': 0.10273675620555878, 'timestamp': '2025-10-01 04:40:08.242634', 'step': 16670, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:08.298452', 'step': 16670, 'epoch': 3} {'type': 'loss', 'content': 0.13061769306659698, 'timestamp': '2025-10-01 04:40:08.306382', 'step': 16671, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:08.360078', 'step': 16671, 'epoch': 3} {'type': 'loss', 'content': 0.10926050692796707, 'timestamp': '2025-10-01 04:40:08.367156', 'step': 16672, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:08.422875', 'step': 16672, 'epoch': 3} {'type': 'loss', 'content': 0.10340555012226105, 'timestamp': '2025-10-01 04:40:08.425052', 'step': 16673, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:08.486056', 'step': 16673, 'epoch': 3} {'type': 'loss', 'content': 0.1076197549700737, 'timestamp': '2025-10-01 04:40:08.488538', 'step': 16674, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:08.549781', 'step': 16674, 'epoch': 3} {'type': 'loss', 'content': 0.056798629462718964, 'timestamp': '2025-10-01 04:40:08.552244', 'step': 16675, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:08.611229', 'step': 16675, 'epoch': 3} {'type': 'loss', 'content': 0.04665940999984741, 'timestamp': '2025-10-01 04:40:08.618514', 'step': 16676, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:08.679261', 'step': 16676, 'epoch': 3} {'type': 'loss', 'content': 0.0674792155623436, 'timestamp': '2025-10-01 04:40:08.681685', 'step': 16677, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:08.743649', 'step': 16677, 'epoch': 3} {'type': 'loss', 'content': 0.056996945291757584, 'timestamp': '2025-10-01 04:40:08.746392', 'step': 16678, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:08.807833', 'step': 16678, 'epoch': 3} {'type': 'loss', 'content': 0.06439784169197083, 'timestamp': '2025-10-01 04:40:08.811417', 'step': 16679, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:40:08.873140', 'step': 16679, 'epoch': 3} {'type': 'loss', 'content': 0.12019071727991104, 'timestamp': '2025-10-01 04:40:08.880800', 'step': 16680, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:08.941076', 'step': 16680, 'epoch': 3} {'type': 'loss', 'content': 0.09516485035419464, 'timestamp': '2025-10-01 04:40:08.944294', 'step': 16681, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:09.007042', 'step': 16681, 'epoch': 3} {'type': 'loss', 'content': 0.10093013942241669, 'timestamp': '2025-10-01 04:40:09.010043', 'step': 16682, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:09.073234', 'step': 16682, 'epoch': 3} {'type': 'loss', 'content': 0.10420510172843933, 'timestamp': '2025-10-01 04:40:09.076137', 'step': 16683, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:09.138822', 'step': 16683, 'epoch': 3} {'type': 'loss', 'content': 0.15075647830963135, 'timestamp': '2025-10-01 04:40:09.146450', 'step': 16684, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:09.207947', 'step': 16684, 'epoch': 3} {'type': 'loss', 'content': 0.10380621254444122, 'timestamp': '2025-10-01 04:40:09.210810', 'step': 16685, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:40:09.275429', 'step': 16685, 'epoch': 3} {'type': 'loss', 'content': 0.07698766887187958, 'timestamp': '2025-10-01 04:40:09.282882', 'step': 16686, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:09.346275', 'step': 16686, 'epoch': 3} {'type': 'loss', 'content': 0.07869397848844528, 'timestamp': '2025-10-01 04:40:09.350297', 'step': 16687, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:09.416132', 'step': 16687, 'epoch': 3} {'type': 'loss', 'content': 0.10323415696620941, 'timestamp': '2025-10-01 04:40:09.423553', 'step': 16688, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:09.483226', 'step': 16688, 'epoch': 3} {'type': 'loss', 'content': 0.06983893364667892, 'timestamp': '2025-10-01 04:40:09.486222', 'step': 16689, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:09.547589', 'step': 16689, 'epoch': 3} {'type': 'loss', 'content': 0.11235848069190979, 'timestamp': '2025-10-01 04:40:09.550483', 'step': 16690, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:09.612190', 'step': 16690, 'epoch': 3} {'type': 'loss', 'content': 0.0789044201374054, 'timestamp': '2025-10-01 04:40:09.615061', 'step': 16691, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:09.677105', 'step': 16691, 'epoch': 3} {'type': 'loss', 'content': 0.05161822587251663, 'timestamp': '2025-10-01 04:40:09.684883', 'step': 16692, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:09.745227', 'step': 16692, 'epoch': 3} {'type': 'loss', 'content': 0.09551694244146347, 'timestamp': '2025-10-01 04:40:09.748131', 'step': 16693, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:40:09.807501', 'step': 16693, 'epoch': 3} {'type': 'loss', 'content': 0.14143620431423187, 'timestamp': '2025-10-01 04:40:09.809919', 'step': 16694, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:09.866932', 'step': 16694, 'epoch': 3} {'type': 'loss', 'content': 0.1529230922460556, 'timestamp': '2025-10-01 04:40:09.869859', 'step': 16695, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:09.926036', 'step': 16695, 'epoch': 3} {'type': 'loss', 'content': 0.1656571477651596, 'timestamp': '2025-10-01 04:40:09.932794', 'step': 16696, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:09.987123', 'step': 16696, 'epoch': 3} {'type': 'loss', 'content': 0.11656837910413742, 'timestamp': '2025-10-01 04:40:09.989898', 'step': 16697, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:10.045099', 'step': 16697, 'epoch': 3} {'type': 'loss', 'content': 0.1360660344362259, 'timestamp': '2025-10-01 04:40:10.048035', 'step': 16698, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:10.102401', 'step': 16698, 'epoch': 3} {'type': 'loss', 'content': 0.08245319128036499, 'timestamp': '2025-10-01 04:40:10.105096', 'step': 16699, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:40:10.159526', 'step': 16699, 'epoch': 3} {'type': 'loss', 'content': 0.07443274557590485, 'timestamp': '2025-10-01 04:40:10.165938', 'step': 16700, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:10.220100', 'step': 16700, 'epoch': 3} {'type': 'loss', 'content': 0.13674764335155487, 'timestamp': '2025-10-01 04:40:10.222929', 'step': 16701, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:10.277460', 'step': 16701, 'epoch': 3} {'type': 'loss', 'content': 0.08821741491556168, 'timestamp': '2025-10-01 04:40:10.281770', 'step': 16702, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:10.338928', 'step': 16702, 'epoch': 3} {'type': 'loss', 'content': 0.08152125775814056, 'timestamp': '2025-10-01 04:40:10.341726', 'step': 16703, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:10.396550', 'step': 16703, 'epoch': 3} {'type': 'loss', 'content': 0.07207129150629044, 'timestamp': '2025-10-01 04:40:10.403520', 'step': 16704, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:40:10.457693', 'step': 16704, 'epoch': 3} {'type': 'loss', 'content': 0.12805578112602234, 'timestamp': '2025-10-01 04:40:10.460934', 'step': 16705, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:10.514754', 'step': 16705, 'epoch': 3} {'type': 'loss', 'content': 0.03553245961666107, 'timestamp': '2025-10-01 04:40:10.517052', 'step': 16706, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:10.570987', 'step': 16706, 'epoch': 3} {'type': 'loss', 'content': 0.0828263908624649, 'timestamp': '2025-10-01 04:40:10.573655', 'step': 16707, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:10.630316', 'step': 16707, 'epoch': 3} {'type': 'loss', 'content': 0.09384176135063171, 'timestamp': '2025-10-01 04:40:10.636739', 'step': 16708, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:10.690933', 'step': 16708, 'epoch': 3} {'type': 'loss', 'content': 0.07524611055850983, 'timestamp': '2025-10-01 04:40:10.693382', 'step': 16709, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:10.746965', 'step': 16709, 'epoch': 3} {'type': 'loss', 'content': 0.1369468718767166, 'timestamp': '2025-10-01 04:40:10.749416', 'step': 16710, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:10.803116', 'step': 16710, 'epoch': 3} {'type': 'loss', 'content': 0.08488805592060089, 'timestamp': '2025-10-01 04:40:10.805901', 'step': 16711, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:10.862232', 'step': 16711, 'epoch': 3} {'type': 'loss', 'content': 0.1167026236653328, 'timestamp': '2025-10-01 04:40:10.868251', 'step': 16712, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:10.921949', 'step': 16712, 'epoch': 3} {'type': 'loss', 'content': 0.06299596279859543, 'timestamp': '2025-10-01 04:40:10.924100', 'step': 16713, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:10.978508', 'step': 16713, 'epoch': 3} {'type': 'loss', 'content': 0.12331594526767731, 'timestamp': '2025-10-01 04:40:10.982171', 'step': 16714, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:11.036315', 'step': 16714, 'epoch': 3} {'type': 'loss', 'content': 0.06335347145795822, 'timestamp': '2025-10-01 04:40:11.038573', 'step': 16715, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:11.093009', 'step': 16715, 'epoch': 3} {'type': 'loss', 'content': 0.13527539372444153, 'timestamp': '2025-10-01 04:40:11.098946', 'step': 16716, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:11.152902', 'step': 16716, 'epoch': 3} {'type': 'loss', 'content': 0.21623660624027252, 'timestamp': '2025-10-01 04:40:11.155155', 'step': 16717, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:40:11.208372', 'step': 16717, 'epoch': 3} {'type': 'loss', 'content': 0.17784284055233002, 'timestamp': '2025-10-01 04:40:11.212523', 'step': 16718, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:11.266078', 'step': 16718, 'epoch': 3} {'type': 'loss', 'content': 0.12484105676412582, 'timestamp': '2025-10-01 04:40:11.268390', 'step': 16719, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:11.322697', 'step': 16719, 'epoch': 3} {'type': 'loss', 'content': 0.0736284926533699, 'timestamp': '2025-10-01 04:40:11.329730', 'step': 16720, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:11.390623', 'step': 16720, 'epoch': 3} {'type': 'loss', 'content': 0.09231466054916382, 'timestamp': '2025-10-01 04:40:11.393238', 'step': 16721, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:11.446260', 'step': 16721, 'epoch': 3} {'type': 'loss', 'content': 0.06613611429929733, 'timestamp': '2025-10-01 04:40:11.448611', 'step': 16722, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:11.502851', 'step': 16722, 'epoch': 3} {'type': 'loss', 'content': 0.0950547605752945, 'timestamp': '2025-10-01 04:40:11.505772', 'step': 16723, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:11.559677', 'step': 16723, 'epoch': 3} {'type': 'loss', 'content': 0.14045462012290955, 'timestamp': '2025-10-01 04:40:11.565684', 'step': 16724, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:40:11.619013', 'step': 16724, 'epoch': 3} {'type': 'loss', 'content': 0.1591515988111496, 'timestamp': '2025-10-01 04:40:11.621324', 'step': 16725, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:11.674489', 'step': 16725, 'epoch': 3} {'type': 'loss', 'content': 0.16586856544017792, 'timestamp': '2025-10-01 04:40:11.676834', 'step': 16726, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:11.732426', 'step': 16726, 'epoch': 3} {'type': 'loss', 'content': 0.12207555770874023, 'timestamp': '2025-10-01 04:40:11.734777', 'step': 16727, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:40:11.788634', 'step': 16727, 'epoch': 3} {'type': 'loss', 'content': 0.08813318610191345, 'timestamp': '2025-10-01 04:40:11.794617', 'step': 16728, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:11.847434', 'step': 16728, 'epoch': 3} {'type': 'loss', 'content': 0.06535055488348007, 'timestamp': '2025-10-01 04:40:11.849650', 'step': 16729, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-01 04:40:11.903282', 'step': 16729, 'epoch': 3} {'type': 'loss', 'content': 0.12534748017787933, 'timestamp': '2025-10-01 04:40:11.906279', 'step': 16730, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:11.960177', 'step': 16730, 'epoch': 3} {'type': 'loss', 'content': 0.047951456159353256, 'timestamp': '2025-10-01 04:40:11.961897', 'step': 16731, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:12.015355', 'step': 16731, 'epoch': 3} {'type': 'loss', 'content': 0.08577464520931244, 'timestamp': '2025-10-01 04:40:12.021291', 'step': 16732, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:12.082550', 'step': 16732, 'epoch': 3} {'type': 'loss', 'content': 0.08463654667139053, 'timestamp': '2025-10-01 04:40:12.085346', 'step': 16733, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-01 04:40:12.139403', 'step': 16733, 'epoch': 3} {'type': 'loss', 'content': 0.047375600785017014, 'timestamp': '2025-10-01 04:40:12.141580', 'step': 16734, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:12.195933', 'step': 16734, 'epoch': 3} {'type': 'loss', 'content': 0.08078091591596603, 'timestamp': '2025-10-01 04:40:12.198316', 'step': 16735, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:12.253446', 'step': 16735, 'epoch': 3} {'type': 'loss', 'content': 0.09930354356765747, 'timestamp': '2025-10-01 04:40:12.259267', 'step': 16736, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:12.312506', 'step': 16736, 'epoch': 3} {'type': 'loss', 'content': 0.07986649125814438, 'timestamp': '2025-10-01 04:40:12.314782', 'step': 16737, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:12.368190', 'step': 16737, 'epoch': 3} {'type': 'loss', 'content': 0.04255073890089989, 'timestamp': '2025-10-01 04:40:12.370223', 'step': 16738, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:12.423904', 'step': 16738, 'epoch': 3} {'type': 'loss', 'content': 0.14610056579113007, 'timestamp': '2025-10-01 04:40:12.426835', 'step': 16739, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:12.485568', 'step': 16739, 'epoch': 3} {'type': 'loss', 'content': 0.12310484051704407, 'timestamp': '2025-10-01 04:40:12.491424', 'step': 16740, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:12.544993', 'step': 16740, 'epoch': 3} {'type': 'loss', 'content': 0.04211030900478363, 'timestamp': '2025-10-01 04:40:12.547276', 'step': 16741, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:12.603132', 'step': 16741, 'epoch': 3} {'type': 'loss', 'content': 0.12489477545022964, 'timestamp': '2025-10-01 04:40:12.605384', 'step': 16742, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:12.658635', 'step': 16742, 'epoch': 3} {'type': 'loss', 'content': 0.10928511619567871, 'timestamp': '2025-10-01 04:40:12.660884', 'step': 16743, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:12.714802', 'step': 16743, 'epoch': 3} {'type': 'loss', 'content': 0.09320464730262756, 'timestamp': '2025-10-01 04:40:12.720263', 'step': 16744, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:40:12.773551', 'step': 16744, 'epoch': 3} {'type': 'loss', 'content': 0.15425343811511993, 'timestamp': '2025-10-01 04:40:12.775665', 'step': 16745, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:12.836459', 'step': 16745, 'epoch': 3} {'type': 'loss', 'content': 0.07796044647693634, 'timestamp': '2025-10-01 04:40:12.839094', 'step': 16746, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:12.892955', 'step': 16746, 'epoch': 3} {'type': 'loss', 'content': 0.13982157409191132, 'timestamp': '2025-10-01 04:40:12.895235', 'step': 16747, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:12.950955', 'step': 16747, 'epoch': 3} {'type': 'loss', 'content': 0.06196380779147148, 'timestamp': '2025-10-01 04:40:12.956814', 'step': 16748, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:13.009905', 'step': 16748, 'epoch': 3} {'type': 'loss', 'content': 0.11063270270824432, 'timestamp': '2025-10-01 04:40:13.012172', 'step': 16749, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:13.065801', 'step': 16749, 'epoch': 3} {'type': 'loss', 'content': 0.03230004385113716, 'timestamp': '2025-10-01 04:40:13.068112', 'step': 16750, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:13.121947', 'step': 16750, 'epoch': 3} {'type': 'loss', 'content': 0.13781149685382843, 'timestamp': '2025-10-01 04:40:13.124060', 'step': 16751, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:13.179174', 'step': 16751, 'epoch': 3} {'type': 'loss', 'content': 0.1339264214038849, 'timestamp': '2025-10-01 04:40:13.193521', 'step': 16752, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:40:13.246909', 'step': 16752, 'epoch': 3} {'type': 'loss', 'content': 0.14113612473011017, 'timestamp': '2025-10-01 04:40:13.249239', 'step': 16753, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:13.311652', 'step': 16753, 'epoch': 3} {'type': 'loss', 'content': 0.05909299850463867, 'timestamp': '2025-10-01 04:40:13.313875', 'step': 16754, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:13.367467', 'step': 16754, 'epoch': 3} {'type': 'loss', 'content': 0.05263702943921089, 'timestamp': '2025-10-01 04:40:13.369727', 'step': 16755, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:13.427440', 'step': 16755, 'epoch': 3} {'type': 'loss', 'content': 0.14419493079185486, 'timestamp': '2025-10-01 04:40:13.450561', 'step': 16756, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:13.509657', 'step': 16756, 'epoch': 3} {'type': 'loss', 'content': 0.027122069150209427, 'timestamp': '2025-10-01 04:40:13.511686', 'step': 16757, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:40:13.565892', 'step': 16757, 'epoch': 3} {'type': 'loss', 'content': 0.14711999893188477, 'timestamp': '2025-10-01 04:40:13.567779', 'step': 16758, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:13.620810', 'step': 16758, 'epoch': 3} {'type': 'loss', 'content': 0.12308911234140396, 'timestamp': '2025-10-01 04:40:13.623817', 'step': 16759, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:13.678480', 'step': 16759, 'epoch': 3} {'type': 'loss', 'content': 0.175772562623024, 'timestamp': '2025-10-01 04:40:13.684280', 'step': 16760, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:13.738204', 'step': 16760, 'epoch': 3} {'type': 'loss', 'content': 0.034109994769096375, 'timestamp': '2025-10-01 04:40:13.740388', 'step': 16761, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:13.793326', 'step': 16761, 'epoch': 3} {'type': 'loss', 'content': 0.07848720997571945, 'timestamp': '2025-10-01 04:40:13.795827', 'step': 16762, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:13.849813', 'step': 16762, 'epoch': 3} {'type': 'loss', 'content': 0.06067532300949097, 'timestamp': '2025-10-01 04:40:13.852049', 'step': 16763, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:13.906162', 'step': 16763, 'epoch': 3} {'type': 'loss', 'content': 0.05726075544953346, 'timestamp': '2025-10-01 04:40:13.919329', 'step': 16764, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:13.972295', 'step': 16764, 'epoch': 3} {'type': 'loss', 'content': 0.0875694528222084, 'timestamp': '2025-10-01 04:40:13.974314', 'step': 16765, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:14.027853', 'step': 16765, 'epoch': 3} {'type': 'loss', 'content': 0.09259895980358124, 'timestamp': '2025-10-01 04:40:14.030016', 'step': 16766, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:40:14.098231', 'step': 16766, 'epoch': 3} {'type': 'loss', 'content': 0.0873660147190094, 'timestamp': '2025-10-01 04:40:14.100607', 'step': 16767, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:14.166243', 'step': 16767, 'epoch': 3} {'type': 'loss', 'content': 0.08507940173149109, 'timestamp': '2025-10-01 04:40:14.172126', 'step': 16768, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:14.224789', 'step': 16768, 'epoch': 3} {'type': 'loss', 'content': 0.10099522769451141, 'timestamp': '2025-10-01 04:40:14.233772', 'step': 16769, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:14.290233', 'step': 16769, 'epoch': 3} {'type': 'loss', 'content': 0.07349244505167007, 'timestamp': '2025-10-01 04:40:14.292396', 'step': 16770, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:14.346415', 'step': 16770, 'epoch': 3} {'type': 'loss', 'content': 0.057094890624284744, 'timestamp': '2025-10-01 04:40:14.349973', 'step': 16771, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:14.410219', 'step': 16771, 'epoch': 3} {'type': 'loss', 'content': 0.14267374575138092, 'timestamp': '2025-10-01 04:40:14.416038', 'step': 16772, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:14.468876', 'step': 16772, 'epoch': 3} {'type': 'loss', 'content': 0.11166369915008545, 'timestamp': '2025-10-01 04:40:14.471443', 'step': 16773, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:14.525084', 'step': 16773, 'epoch': 3} {'type': 'loss', 'content': 0.09006818383932114, 'timestamp': '2025-10-01 04:40:14.537956', 'step': 16774, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:14.602289', 'step': 16774, 'epoch': 3} {'type': 'loss', 'content': 0.05831724777817726, 'timestamp': '2025-10-01 04:40:14.604837', 'step': 16775, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:14.658384', 'step': 16775, 'epoch': 3} {'type': 'loss', 'content': 0.07825551927089691, 'timestamp': '2025-10-01 04:40:14.664229', 'step': 16776, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-01 04:40:27.775153', 'step': 16776, 'epoch': 3} {'type': 'pplx', 'content': 11525.04012160292, 'timestamp': '2025-10-01 04:40:27.778495', 'step': 16776, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:27.832942', 'step': 16776, 'epoch': 3} {'type': 'loss', 'content': 0.07623253017663956, 'timestamp': '2025-10-01 04:40:27.835284', 'step': 16777, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:27.890436', 'step': 16777, 'epoch': 3} {'type': 'loss', 'content': 0.04051654413342476, 'timestamp': '2025-10-01 04:40:27.893249', 'step': 16778, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:27.947655', 'step': 16778, 'epoch': 3} {'type': 'loss', 'content': 0.06356155872344971, 'timestamp': '2025-10-01 04:40:27.950234', 'step': 16779, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:28.004559', 'step': 16779, 'epoch': 3} {'type': 'loss', 'content': 0.09341466426849365, 'timestamp': '2025-10-01 04:40:28.011127', 'step': 16780, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:28.064307', 'step': 16780, 'epoch': 3} {'type': 'loss', 'content': 0.07849936932325363, 'timestamp': '2025-10-01 04:40:28.068046', 'step': 16781, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:28.124327', 'step': 16781, 'epoch': 3} {'type': 'loss', 'content': 0.09232057631015778, 'timestamp': '2025-10-01 04:40:28.126972', 'step': 16782, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:40:28.182102', 'step': 16782, 'epoch': 3} {'type': 'loss', 'content': 0.09003565460443497, 'timestamp': '2025-10-01 04:40:28.184974', 'step': 16783, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:28.245864', 'step': 16783, 'epoch': 3} {'type': 'loss', 'content': 0.12599806487560272, 'timestamp': '2025-10-01 04:40:28.252281', 'step': 16784, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:28.306123', 'step': 16784, 'epoch': 3} {'type': 'loss', 'content': 0.12575308978557587, 'timestamp': '2025-10-01 04:40:28.308810', 'step': 16785, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:28.363592', 'step': 16785, 'epoch': 3} {'type': 'loss', 'content': 0.1088658794760704, 'timestamp': '2025-10-01 04:40:28.366162', 'step': 16786, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:28.420781', 'step': 16786, 'epoch': 3} {'type': 'loss', 'content': 0.07526509463787079, 'timestamp': '2025-10-01 04:40:28.423087', 'step': 16787, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:28.477547', 'step': 16787, 'epoch': 3} {'type': 'loss', 'content': 0.1472402960062027, 'timestamp': '2025-10-01 04:40:28.483696', 'step': 16788, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:28.543196', 'step': 16788, 'epoch': 3} {'type': 'loss', 'content': 0.07838771492242813, 'timestamp': '2025-10-01 04:40:28.545628', 'step': 16789, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:28.600613', 'step': 16789, 'epoch': 3} {'type': 'loss', 'content': 0.06723600625991821, 'timestamp': '2025-10-01 04:40:28.603935', 'step': 16790, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:28.657672', 'step': 16790, 'epoch': 3} {'type': 'loss', 'content': 0.13400113582611084, 'timestamp': '2025-10-01 04:40:28.660078', 'step': 16791, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:28.713093', 'step': 16791, 'epoch': 3} {'type': 'loss', 'content': 0.08827080577611923, 'timestamp': '2025-10-01 04:40:28.719125', 'step': 16792, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:28.773876', 'step': 16792, 'epoch': 3} {'type': 'loss', 'content': 0.12590931355953217, 'timestamp': '2025-10-01 04:40:28.776072', 'step': 16793, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:28.829189', 'step': 16793, 'epoch': 3} {'type': 'loss', 'content': 0.04095067083835602, 'timestamp': '2025-10-01 04:40:28.831367', 'step': 16794, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:28.885695', 'step': 16794, 'epoch': 3} {'type': 'loss', 'content': 0.055746715515851974, 'timestamp': '2025-10-01 04:40:28.888129', 'step': 16795, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:28.945328', 'step': 16795, 'epoch': 3} {'type': 'loss', 'content': 0.07208698242902756, 'timestamp': '2025-10-01 04:40:28.951269', 'step': 16796, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:40:29.005355', 'step': 16796, 'epoch': 3} {'type': 'loss', 'content': 0.10604305565357208, 'timestamp': '2025-10-01 04:40:29.008282', 'step': 16797, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:29.061642', 'step': 16797, 'epoch': 3} {'type': 'loss', 'content': 0.06126796826720238, 'timestamp': '2025-10-01 04:40:29.063862', 'step': 16798, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:29.117863', 'step': 16798, 'epoch': 3} {'type': 'loss', 'content': 0.06991273909807205, 'timestamp': '2025-10-01 04:40:29.120061', 'step': 16799, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:29.175198', 'step': 16799, 'epoch': 3} {'type': 'loss', 'content': 0.06646698713302612, 'timestamp': '2025-10-01 04:40:29.181290', 'step': 16800, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:29.239199', 'step': 16800, 'epoch': 3} {'type': 'loss', 'content': 0.10884139686822891, 'timestamp': '2025-10-01 04:40:29.241542', 'step': 16801, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:29.296029', 'step': 16801, 'epoch': 3} {'type': 'loss', 'content': 0.04162120074033737, 'timestamp': '2025-10-01 04:40:29.298199', 'step': 16802, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:29.353590', 'step': 16802, 'epoch': 3} {'type': 'loss', 'content': 0.1255841702222824, 'timestamp': '2025-10-01 04:40:29.355872', 'step': 16803, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:29.417996', 'step': 16803, 'epoch': 3} {'type': 'loss', 'content': 0.09918183088302612, 'timestamp': '2025-10-01 04:40:29.424383', 'step': 16804, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:29.480142', 'step': 16804, 'epoch': 3} {'type': 'loss', 'content': 0.054737865924835205, 'timestamp': '2025-10-01 04:40:29.482764', 'step': 16805, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:29.537035', 'step': 16805, 'epoch': 3} {'type': 'loss', 'content': 0.08582230657339096, 'timestamp': '2025-10-01 04:40:29.539371', 'step': 16806, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:40:29.592875', 'step': 16806, 'epoch': 3} {'type': 'loss', 'content': 0.09898295253515244, 'timestamp': '2025-10-01 04:40:29.595759', 'step': 16807, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:40:29.650107', 'step': 16807, 'epoch': 3} {'type': 'loss', 'content': 0.09652917832136154, 'timestamp': '2025-10-01 04:40:29.658868', 'step': 16808, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:29.713779', 'step': 16808, 'epoch': 3} {'type': 'loss', 'content': 0.050540097057819366, 'timestamp': '2025-10-01 04:40:29.718702', 'step': 16809, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:29.774121', 'step': 16809, 'epoch': 3} {'type': 'loss', 'content': 0.1435726433992386, 'timestamp': '2025-10-01 04:40:29.776243', 'step': 16810, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:40:29.832188', 'step': 16810, 'epoch': 3} {'type': 'loss', 'content': 0.05367916822433472, 'timestamp': '2025-10-01 04:40:29.834847', 'step': 16811, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:29.889305', 'step': 16811, 'epoch': 3} {'type': 'loss', 'content': 0.05701892077922821, 'timestamp': '2025-10-01 04:40:29.904538', 'step': 16812, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:29.957843', 'step': 16812, 'epoch': 3} {'type': 'loss', 'content': 0.057471223175525665, 'timestamp': '2025-10-01 04:40:29.960508', 'step': 16813, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:30.013765', 'step': 16813, 'epoch': 3} {'type': 'loss', 'content': 0.06889908015727997, 'timestamp': '2025-10-01 04:40:30.016048', 'step': 16814, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:40:30.069958', 'step': 16814, 'epoch': 3} {'type': 'loss', 'content': 0.0902380421757698, 'timestamp': '2025-10-01 04:40:30.072272', 'step': 16815, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:30.126918', 'step': 16815, 'epoch': 3} {'type': 'loss', 'content': 0.12977862358093262, 'timestamp': '2025-10-01 04:40:30.137300', 'step': 16816, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:30.199149', 'step': 16816, 'epoch': 3} {'type': 'loss', 'content': 0.05042744427919388, 'timestamp': '2025-10-01 04:40:30.205810', 'step': 16817, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:30.259534', 'step': 16817, 'epoch': 3} {'type': 'loss', 'content': 0.05322200432419777, 'timestamp': '2025-10-01 04:40:30.261945', 'step': 16818, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:30.315761', 'step': 16818, 'epoch': 3} {'type': 'loss', 'content': 0.08593937754631042, 'timestamp': '2025-10-01 04:40:30.318174', 'step': 16819, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:30.372401', 'step': 16819, 'epoch': 3} {'type': 'loss', 'content': 0.09163197129964828, 'timestamp': '2025-10-01 04:40:30.382144', 'step': 16820, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:30.435377', 'step': 16820, 'epoch': 3} {'type': 'loss', 'content': 0.09027963131666183, 'timestamp': '2025-10-01 04:40:30.437607', 'step': 16821, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:30.491410', 'step': 16821, 'epoch': 3} {'type': 'loss', 'content': 0.13307130336761475, 'timestamp': '2025-10-01 04:40:30.493739', 'step': 16822, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:30.549570', 'step': 16822, 'epoch': 3} {'type': 'loss', 'content': 0.16424091160297394, 'timestamp': '2025-10-01 04:40:30.552200', 'step': 16823, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:30.606485', 'step': 16823, 'epoch': 3} {'type': 'loss', 'content': 0.16847775876522064, 'timestamp': '2025-10-01 04:40:30.612353', 'step': 16824, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:30.665112', 'step': 16824, 'epoch': 3} {'type': 'loss', 'content': 0.057848621159791946, 'timestamp': '2025-10-01 04:40:30.667598', 'step': 16825, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:30.728902', 'step': 16825, 'epoch': 3} {'type': 'loss', 'content': 0.1289455145597458, 'timestamp': '2025-10-01 04:40:30.731107', 'step': 16826, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:30.785822', 'step': 16826, 'epoch': 3} {'type': 'loss', 'content': 0.13258622586727142, 'timestamp': '2025-10-01 04:40:30.788017', 'step': 16827, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:30.841451', 'step': 16827, 'epoch': 3} {'type': 'loss', 'content': 0.11703712493181229, 'timestamp': '2025-10-01 04:40:30.847230', 'step': 16828, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:30.899908', 'step': 16828, 'epoch': 3} {'type': 'loss', 'content': 0.13435834646224976, 'timestamp': '2025-10-01 04:40:30.902082', 'step': 16829, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:40:30.955146', 'step': 16829, 'epoch': 3} {'type': 'loss', 'content': 0.0967513844370842, 'timestamp': '2025-10-01 04:40:30.957478', 'step': 16830, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:31.011540', 'step': 16830, 'epoch': 3} {'type': 'loss', 'content': 0.09069056808948517, 'timestamp': '2025-10-01 04:40:31.013824', 'step': 16831, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:31.067663', 'step': 16831, 'epoch': 3} {'type': 'loss', 'content': 0.09157836437225342, 'timestamp': '2025-10-01 04:40:31.074164', 'step': 16832, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:31.131203', 'step': 16832, 'epoch': 3} {'type': 'loss', 'content': 0.06166749447584152, 'timestamp': '2025-10-01 04:40:31.134348', 'step': 16833, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:31.190201', 'step': 16833, 'epoch': 3} {'type': 'loss', 'content': 0.08282341808080673, 'timestamp': '2025-10-01 04:40:31.192542', 'step': 16834, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:40:31.245712', 'step': 16834, 'epoch': 3} {'type': 'loss', 'content': 0.09599613398313522, 'timestamp': '2025-10-01 04:40:31.248102', 'step': 16835, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:40:31.302305', 'step': 16835, 'epoch': 3} {'type': 'loss', 'content': 0.09856797754764557, 'timestamp': '2025-10-01 04:40:31.308087', 'step': 16836, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:31.372776', 'step': 16836, 'epoch': 3} {'type': 'loss', 'content': 0.12705914676189423, 'timestamp': '2025-10-01 04:40:31.375372', 'step': 16837, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:40:31.429163', 'step': 16837, 'epoch': 3} {'type': 'loss', 'content': 0.12868820130825043, 'timestamp': '2025-10-01 04:40:31.438583', 'step': 16838, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:31.493107', 'step': 16838, 'epoch': 3} {'type': 'loss', 'content': 0.09905985742807388, 'timestamp': '2025-10-01 04:40:31.495288', 'step': 16839, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:31.548534', 'step': 16839, 'epoch': 3} {'type': 'loss', 'content': 0.07473761588335037, 'timestamp': '2025-10-01 04:40:31.554376', 'step': 16840, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:31.606695', 'step': 16840, 'epoch': 3} {'type': 'loss', 'content': 0.1564057469367981, 'timestamp': '2025-10-01 04:40:31.608959', 'step': 16841, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:31.662336', 'step': 16841, 'epoch': 3} {'type': 'loss', 'content': 0.04358208179473877, 'timestamp': '2025-10-01 04:40:31.664550', 'step': 16842, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:31.718033', 'step': 16842, 'epoch': 3} {'type': 'loss', 'content': 0.036895573139190674, 'timestamp': '2025-10-01 04:40:31.721287', 'step': 16843, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:31.774868', 'step': 16843, 'epoch': 3} {'type': 'loss', 'content': 0.17564116418361664, 'timestamp': '2025-10-01 04:40:31.780978', 'step': 16844, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:31.834450', 'step': 16844, 'epoch': 3} {'type': 'loss', 'content': 0.1132347360253334, 'timestamp': '2025-10-01 04:40:31.836639', 'step': 16845, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:40:31.890705', 'step': 16845, 'epoch': 3} {'type': 'loss', 'content': 0.1526220291852951, 'timestamp': '2025-10-01 04:40:31.893002', 'step': 16846, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:40:31.946367', 'step': 16846, 'epoch': 3} {'type': 'loss', 'content': 0.04297754913568497, 'timestamp': '2025-10-01 04:40:31.950460', 'step': 16847, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:32.004640', 'step': 16847, 'epoch': 3} {'type': 'loss', 'content': 0.10620580613613129, 'timestamp': '2025-10-01 04:40:32.010775', 'step': 16848, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:32.063821', 'step': 16848, 'epoch': 3} {'type': 'loss', 'content': 0.06584487855434418, 'timestamp': '2025-10-01 04:40:32.066209', 'step': 16849, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:32.120284', 'step': 16849, 'epoch': 3} {'type': 'loss', 'content': 0.07500612735748291, 'timestamp': '2025-10-01 04:40:32.123144', 'step': 16850, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:40:32.177456', 'step': 16850, 'epoch': 3} {'type': 'loss', 'content': 0.10194826126098633, 'timestamp': '2025-10-01 04:40:32.190317', 'step': 16851, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:32.243762', 'step': 16851, 'epoch': 3} {'type': 'loss', 'content': 0.08663108199834824, 'timestamp': '2025-10-01 04:40:32.250049', 'step': 16852, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:32.303448', 'step': 16852, 'epoch': 3} {'type': 'loss', 'content': 0.12912364304065704, 'timestamp': '2025-10-01 04:40:32.305776', 'step': 16853, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:32.359724', 'step': 16853, 'epoch': 3} {'type': 'loss', 'content': 0.08344043046236038, 'timestamp': '2025-10-01 04:40:32.362671', 'step': 16854, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:32.416774', 'step': 16854, 'epoch': 3} {'type': 'loss', 'content': 0.09687855839729309, 'timestamp': '2025-10-01 04:40:32.419059', 'step': 16855, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:32.472436', 'step': 16855, 'epoch': 3} {'type': 'loss', 'content': 0.1513746827840805, 'timestamp': '2025-10-01 04:40:32.478294', 'step': 16856, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:40:32.531268', 'step': 16856, 'epoch': 3} {'type': 'loss', 'content': 0.05259310081601143, 'timestamp': '2025-10-01 04:40:32.533863', 'step': 16857, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:32.587120', 'step': 16857, 'epoch': 3} {'type': 'loss', 'content': 0.128576397895813, 'timestamp': '2025-10-01 04:40:32.589994', 'step': 16858, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:32.643153', 'step': 16858, 'epoch': 3} {'type': 'loss', 'content': 0.14824049174785614, 'timestamp': '2025-10-01 04:40:32.646643', 'step': 16859, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:32.700285', 'step': 16859, 'epoch': 3} {'type': 'loss', 'content': 0.13912636041641235, 'timestamp': '2025-10-01 04:40:32.707246', 'step': 16860, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:32.760311', 'step': 16860, 'epoch': 3} {'type': 'loss', 'content': 0.14189372956752777, 'timestamp': '2025-10-01 04:40:32.762677', 'step': 16861, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:32.816471', 'step': 16861, 'epoch': 3} {'type': 'loss', 'content': 0.09311120957136154, 'timestamp': '2025-10-01 04:40:32.818776', 'step': 16862, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:32.873092', 'step': 16862, 'epoch': 3} {'type': 'loss', 'content': 0.07973220199346542, 'timestamp': '2025-10-01 04:40:32.875558', 'step': 16863, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:32.929748', 'step': 16863, 'epoch': 3} {'type': 'loss', 'content': 0.20056375861167908, 'timestamp': '2025-10-01 04:40:32.935638', 'step': 16864, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:32.988437', 'step': 16864, 'epoch': 3} {'type': 'loss', 'content': 0.17079348862171173, 'timestamp': '2025-10-01 04:40:32.990586', 'step': 16865, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:33.058887', 'step': 16865, 'epoch': 3} {'type': 'loss', 'content': 0.09078110754489899, 'timestamp': '2025-10-01 04:40:33.061265', 'step': 16866, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:33.114427', 'step': 16866, 'epoch': 3} {'type': 'loss', 'content': 0.1216944009065628, 'timestamp': '2025-10-01 04:40:33.116786', 'step': 16867, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:33.170583', 'step': 16867, 'epoch': 3} {'type': 'loss', 'content': 0.11160554736852646, 'timestamp': '2025-10-01 04:40:33.176414', 'step': 16868, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:33.229978', 'step': 16868, 'epoch': 3} {'type': 'loss', 'content': 0.22206754982471466, 'timestamp': '2025-10-01 04:40:33.232168', 'step': 16869, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:33.285458', 'step': 16869, 'epoch': 3} {'type': 'loss', 'content': 0.06431003659963608, 'timestamp': '2025-10-01 04:40:33.287625', 'step': 16870, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:33.341072', 'step': 16870, 'epoch': 3} {'type': 'loss', 'content': 0.12245799601078033, 'timestamp': '2025-10-01 04:40:33.343272', 'step': 16871, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:33.396633', 'step': 16871, 'epoch': 3} {'type': 'loss', 'content': 0.0735727921128273, 'timestamp': '2025-10-01 04:40:33.402454', 'step': 16872, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:40:33.461688', 'step': 16872, 'epoch': 3} {'type': 'loss', 'content': 0.07156296819448471, 'timestamp': '2025-10-01 04:40:33.464042', 'step': 16873, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:33.517332', 'step': 16873, 'epoch': 3} {'type': 'loss', 'content': 0.05044378340244293, 'timestamp': '2025-10-01 04:40:33.519547', 'step': 16874, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:33.573708', 'step': 16874, 'epoch': 3} {'type': 'loss', 'content': 0.08004038035869598, 'timestamp': '2025-10-01 04:40:33.575977', 'step': 16875, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:33.634306', 'step': 16875, 'epoch': 3} {'type': 'loss', 'content': 0.06872367858886719, 'timestamp': '2025-10-01 04:40:33.640492', 'step': 16876, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:33.693892', 'step': 16876, 'epoch': 3} {'type': 'loss', 'content': 0.11186095327138901, 'timestamp': '2025-10-01 04:40:33.696296', 'step': 16877, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:33.750968', 'step': 16877, 'epoch': 3} {'type': 'loss', 'content': 0.05086600035429001, 'timestamp': '2025-10-01 04:40:33.754583', 'step': 16878, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:33.815043', 'step': 16878, 'epoch': 3} {'type': 'loss', 'content': 0.08854147046804428, 'timestamp': '2025-10-01 04:40:33.817337', 'step': 16879, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:33.870761', 'step': 16879, 'epoch': 3} {'type': 'loss', 'content': 0.11452171206474304, 'timestamp': '2025-10-01 04:40:33.876718', 'step': 16880, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:33.930390', 'step': 16880, 'epoch': 3} {'type': 'loss', 'content': 0.052300237119197845, 'timestamp': '2025-10-01 04:40:33.933503', 'step': 16881, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:40:33.989496', 'step': 16881, 'epoch': 3} {'type': 'loss', 'content': 0.04727739095687866, 'timestamp': '2025-10-01 04:40:33.991766', 'step': 16882, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:34.045391', 'step': 16882, 'epoch': 3} {'type': 'loss', 'content': 0.03878644481301308, 'timestamp': '2025-10-01 04:40:34.048076', 'step': 16883, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:34.102081', 'step': 16883, 'epoch': 3} {'type': 'loss', 'content': 0.08437000960111618, 'timestamp': '2025-10-01 04:40:34.108082', 'step': 16884, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:34.160874', 'step': 16884, 'epoch': 3} {'type': 'loss', 'content': 0.05514891445636749, 'timestamp': '2025-10-01 04:40:34.163108', 'step': 16885, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:34.216124', 'step': 16885, 'epoch': 3} {'type': 'loss', 'content': 0.1155562624335289, 'timestamp': '2025-10-01 04:40:34.218528', 'step': 16886, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:34.271867', 'step': 16886, 'epoch': 3} {'type': 'loss', 'content': 0.19625744223594666, 'timestamp': '2025-10-01 04:40:34.283429', 'step': 16887, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:34.337154', 'step': 16887, 'epoch': 3} {'type': 'loss', 'content': 0.10681264102458954, 'timestamp': '2025-10-01 04:40:34.342929', 'step': 16888, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:40:34.395921', 'step': 16888, 'epoch': 3} {'type': 'loss', 'content': 0.08459698408842087, 'timestamp': '2025-10-01 04:40:34.398271', 'step': 16889, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:34.451426', 'step': 16889, 'epoch': 3} {'type': 'loss', 'content': 0.1366874873638153, 'timestamp': '2025-10-01 04:40:34.454344', 'step': 16890, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:34.511880', 'step': 16890, 'epoch': 3} {'type': 'loss', 'content': 0.0934569388628006, 'timestamp': '2025-10-01 04:40:34.514187', 'step': 16891, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:34.568873', 'step': 16891, 'epoch': 3} {'type': 'loss', 'content': 0.08884874731302261, 'timestamp': '2025-10-01 04:40:34.574236', 'step': 16892, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:34.627042', 'step': 16892, 'epoch': 3} {'type': 'loss', 'content': 0.07097159326076508, 'timestamp': '2025-10-01 04:40:34.629235', 'step': 16893, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:34.683524', 'step': 16893, 'epoch': 3} {'type': 'loss', 'content': 0.07496048510074615, 'timestamp': '2025-10-01 04:40:34.686430', 'step': 16894, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:34.741591', 'step': 16894, 'epoch': 3} {'type': 'loss', 'content': 0.08532289415597916, 'timestamp': '2025-10-01 04:40:34.744200', 'step': 16895, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:40:34.797642', 'step': 16895, 'epoch': 3} {'type': 'loss', 'content': 0.0556383915245533, 'timestamp': '2025-10-01 04:40:34.803607', 'step': 16896, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:34.856036', 'step': 16896, 'epoch': 3} {'type': 'loss', 'content': 0.0679548978805542, 'timestamp': '2025-10-01 04:40:34.859007', 'step': 16897, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:34.912229', 'step': 16897, 'epoch': 3} {'type': 'loss', 'content': 0.07856155931949615, 'timestamp': '2025-10-01 04:40:34.921372', 'step': 16898, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:34.975067', 'step': 16898, 'epoch': 3} {'type': 'loss', 'content': 0.11660338938236237, 'timestamp': '2025-10-01 04:40:34.977420', 'step': 16899, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:35.032607', 'step': 16899, 'epoch': 3} {'type': 'loss', 'content': 0.11097747832536697, 'timestamp': '2025-10-01 04:40:35.039088', 'step': 16900, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:35.097989', 'step': 16900, 'epoch': 3} {'type': 'loss', 'content': 0.09120898693799973, 'timestamp': '2025-10-01 04:40:35.100588', 'step': 16901, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:40:35.155797', 'step': 16901, 'epoch': 3} {'type': 'loss', 'content': 0.208154559135437, 'timestamp': '2025-10-01 04:40:35.158258', 'step': 16902, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:35.217864', 'step': 16902, 'epoch': 3} {'type': 'loss', 'content': 0.08660487830638885, 'timestamp': '2025-10-01 04:40:35.220155', 'step': 16903, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:35.273670', 'step': 16903, 'epoch': 3} {'type': 'loss', 'content': 0.07675907015800476, 'timestamp': '2025-10-01 04:40:35.280081', 'step': 16904, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:35.334255', 'step': 16904, 'epoch': 3} {'type': 'loss', 'content': 0.06272856891155243, 'timestamp': '2025-10-01 04:40:35.336581', 'step': 16905, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:35.390881', 'step': 16905, 'epoch': 3} {'type': 'loss', 'content': 0.04892620071768761, 'timestamp': '2025-10-01 04:40:35.393430', 'step': 16906, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:35.447041', 'step': 16906, 'epoch': 3} {'type': 'loss', 'content': 0.0531039759516716, 'timestamp': '2025-10-01 04:40:35.454419', 'step': 16907, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:35.515192', 'step': 16907, 'epoch': 3} {'type': 'loss', 'content': 0.07309943437576294, 'timestamp': '2025-10-01 04:40:35.521179', 'step': 16908, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:35.574321', 'step': 16908, 'epoch': 3} {'type': 'loss', 'content': 0.12700603902339935, 'timestamp': '2025-10-01 04:40:35.576589', 'step': 16909, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:35.633201', 'step': 16909, 'epoch': 3} {'type': 'loss', 'content': 0.048739612102508545, 'timestamp': '2025-10-01 04:40:35.635497', 'step': 16910, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:40:35.688805', 'step': 16910, 'epoch': 3} {'type': 'loss', 'content': 0.07914673537015915, 'timestamp': '2025-10-01 04:40:35.691072', 'step': 16911, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:35.744745', 'step': 16911, 'epoch': 3} {'type': 'loss', 'content': 0.04215746372938156, 'timestamp': '2025-10-01 04:40:35.750628', 'step': 16912, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:35.803245', 'step': 16912, 'epoch': 3} {'type': 'loss', 'content': 0.021343113854527473, 'timestamp': '2025-10-01 04:40:35.805396', 'step': 16913, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:35.858208', 'step': 16913, 'epoch': 3} {'type': 'loss', 'content': 0.08441733568906784, 'timestamp': '2025-10-01 04:40:35.860333', 'step': 16914, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:35.913977', 'step': 16914, 'epoch': 3} {'type': 'loss', 'content': 0.1245385929942131, 'timestamp': '2025-10-01 04:40:35.916293', 'step': 16915, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:35.969285', 'step': 16915, 'epoch': 3} {'type': 'loss', 'content': 0.11047084629535675, 'timestamp': '2025-10-01 04:40:35.975920', 'step': 16916, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:40:36.029217', 'step': 16916, 'epoch': 3} {'type': 'loss', 'content': 0.14185556769371033, 'timestamp': '2025-10-01 04:40:36.032038', 'step': 16917, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:36.086602', 'step': 16917, 'epoch': 3} {'type': 'loss', 'content': 0.08238878101110458, 'timestamp': '2025-10-01 04:40:36.089160', 'step': 16918, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:36.143529', 'step': 16918, 'epoch': 3} {'type': 'loss', 'content': 0.07823511213064194, 'timestamp': '2025-10-01 04:40:36.146298', 'step': 16919, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:36.202788', 'step': 16919, 'epoch': 3} {'type': 'loss', 'content': 0.07294788211584091, 'timestamp': '2025-10-01 04:40:36.209218', 'step': 16920, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:36.271921', 'step': 16920, 'epoch': 3} {'type': 'loss', 'content': 0.032453637570142746, 'timestamp': '2025-10-01 04:40:36.275212', 'step': 16921, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:36.329572', 'step': 16921, 'epoch': 3} {'type': 'loss', 'content': 0.11145375669002533, 'timestamp': '2025-10-01 04:40:36.332340', 'step': 16922, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:36.386715', 'step': 16922, 'epoch': 3} {'type': 'loss', 'content': 0.11506500840187073, 'timestamp': '2025-10-01 04:40:36.390046', 'step': 16923, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:40:36.450867', 'step': 16923, 'epoch': 3} {'type': 'loss', 'content': 0.13870695233345032, 'timestamp': '2025-10-01 04:40:36.456983', 'step': 16924, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:36.511154', 'step': 16924, 'epoch': 3} {'type': 'loss', 'content': 0.0371810607612133, 'timestamp': '2025-10-01 04:40:36.513497', 'step': 16925, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:36.569215', 'step': 16925, 'epoch': 3} {'type': 'loss', 'content': 0.05200731009244919, 'timestamp': '2025-10-01 04:40:36.573698', 'step': 16926, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:40:36.627717', 'step': 16926, 'epoch': 3} {'type': 'loss', 'content': 0.11366161704063416, 'timestamp': '2025-10-01 04:40:36.630996', 'step': 16927, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:40:36.685206', 'step': 16927, 'epoch': 3} {'type': 'loss', 'content': 0.11943242698907852, 'timestamp': '2025-10-01 04:40:36.691295', 'step': 16928, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:36.745212', 'step': 16928, 'epoch': 3} {'type': 'loss', 'content': 0.06995803862810135, 'timestamp': '2025-10-01 04:40:36.747497', 'step': 16929, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:36.802883', 'step': 16929, 'epoch': 3} {'type': 'loss', 'content': 0.143285870552063, 'timestamp': '2025-10-01 04:40:36.805262', 'step': 16930, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:36.870437', 'step': 16930, 'epoch': 3} {'type': 'loss', 'content': 0.08188245445489883, 'timestamp': '2025-10-01 04:40:36.874254', 'step': 16931, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:36.929316', 'step': 16931, 'epoch': 3} {'type': 'loss', 'content': 0.10017834603786469, 'timestamp': '2025-10-01 04:40:36.935305', 'step': 16932, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:40:36.994737', 'step': 16932, 'epoch': 3} {'type': 'loss', 'content': 0.17882421612739563, 'timestamp': '2025-10-01 04:40:36.997559', 'step': 16933, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:37.051460', 'step': 16933, 'epoch': 3} {'type': 'loss', 'content': 0.10093801468610764, 'timestamp': '2025-10-01 04:40:37.054220', 'step': 16934, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:37.110893', 'step': 16934, 'epoch': 3} {'type': 'loss', 'content': 0.060836222022771835, 'timestamp': '2025-10-01 04:40:37.113518', 'step': 16935, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:37.167285', 'step': 16935, 'epoch': 3} {'type': 'loss', 'content': 0.02423388697206974, 'timestamp': '2025-10-01 04:40:37.173807', 'step': 16936, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:37.234118', 'step': 16936, 'epoch': 3} {'type': 'loss', 'content': 0.08257181942462921, 'timestamp': '2025-10-01 04:40:37.236678', 'step': 16937, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:40:37.291567', 'step': 16937, 'epoch': 3} {'type': 'loss', 'content': 0.14257755875587463, 'timestamp': '2025-10-01 04:40:37.294413', 'step': 16938, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:37.348691', 'step': 16938, 'epoch': 3} {'type': 'loss', 'content': 0.10017023235559464, 'timestamp': '2025-10-01 04:40:37.351239', 'step': 16939, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:37.413369', 'step': 16939, 'epoch': 3} {'type': 'loss', 'content': 0.18136601150035858, 'timestamp': '2025-10-01 04:40:37.420239', 'step': 16940, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:37.474625', 'step': 16940, 'epoch': 3} {'type': 'loss', 'content': 0.16267895698547363, 'timestamp': '2025-10-01 04:40:37.476991', 'step': 16941, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:37.530295', 'step': 16941, 'epoch': 3} {'type': 'loss', 'content': 0.10280882567167282, 'timestamp': '2025-10-01 04:40:37.533499', 'step': 16942, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:37.588629', 'step': 16942, 'epoch': 3} {'type': 'loss', 'content': 0.13877470791339874, 'timestamp': '2025-10-01 04:40:37.591710', 'step': 16943, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:37.647281', 'step': 16943, 'epoch': 3} {'type': 'loss', 'content': 0.07133903354406357, 'timestamp': '2025-10-01 04:40:37.653541', 'step': 16944, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:40:37.708921', 'step': 16944, 'epoch': 3} {'type': 'loss', 'content': 0.1301378458738327, 'timestamp': '2025-10-01 04:40:37.712994', 'step': 16945, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:40:37.766997', 'step': 16945, 'epoch': 3} {'type': 'loss', 'content': 0.06202627718448639, 'timestamp': '2025-10-01 04:40:37.769221', 'step': 16946, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:37.824014', 'step': 16946, 'epoch': 3} {'type': 'loss', 'content': 0.08923159539699554, 'timestamp': '2025-10-01 04:40:37.826365', 'step': 16947, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:37.880804', 'step': 16947, 'epoch': 3} {'type': 'loss', 'content': 0.08662880212068558, 'timestamp': '2025-10-01 04:40:37.886934', 'step': 16948, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:37.941063', 'step': 16948, 'epoch': 3} {'type': 'loss', 'content': 0.0997719094157219, 'timestamp': '2025-10-01 04:40:37.943447', 'step': 16949, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:37.998192', 'step': 16949, 'epoch': 3} {'type': 'loss', 'content': 0.07089513540267944, 'timestamp': '2025-10-01 04:40:38.001372', 'step': 16950, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:38.056526', 'step': 16950, 'epoch': 3} {'type': 'loss', 'content': 0.009356463328003883, 'timestamp': '2025-10-01 04:40:38.058805', 'step': 16951, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:38.113571', 'step': 16951, 'epoch': 3} {'type': 'loss', 'content': 0.06767076998949051, 'timestamp': '2025-10-01 04:40:38.120124', 'step': 16952, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:38.176311', 'step': 16952, 'epoch': 3} {'type': 'loss', 'content': 0.15707933902740479, 'timestamp': '2025-10-01 04:40:38.179361', 'step': 16953, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:38.233695', 'step': 16953, 'epoch': 3} {'type': 'loss', 'content': 0.08910754323005676, 'timestamp': '2025-10-01 04:40:38.236673', 'step': 16954, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:38.291648', 'step': 16954, 'epoch': 3} {'type': 'loss', 'content': 0.05710729956626892, 'timestamp': '2025-10-01 04:40:38.293947', 'step': 16955, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:38.348929', 'step': 16955, 'epoch': 3} {'type': 'loss', 'content': 0.1415509581565857, 'timestamp': '2025-10-01 04:40:38.355433', 'step': 16956, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:38.409702', 'step': 16956, 'epoch': 3} {'type': 'loss', 'content': 0.07252760231494904, 'timestamp': '2025-10-01 04:40:38.411967', 'step': 16957, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:38.465417', 'step': 16957, 'epoch': 3} {'type': 'loss', 'content': 0.11118382960557938, 'timestamp': '2025-10-01 04:40:38.467788', 'step': 16958, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:38.521486', 'step': 16958, 'epoch': 3} {'type': 'loss', 'content': 0.16132168471813202, 'timestamp': '2025-10-01 04:40:38.523876', 'step': 16959, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:38.578008', 'step': 16959, 'epoch': 3} {'type': 'loss', 'content': 0.06239042431116104, 'timestamp': '2025-10-01 04:40:38.584107', 'step': 16960, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:38.637117', 'step': 16960, 'epoch': 3} {'type': 'loss', 'content': 0.15606540441513062, 'timestamp': '2025-10-01 04:40:38.640091', 'step': 16961, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:38.695911', 'step': 16961, 'epoch': 3} {'type': 'loss', 'content': 0.09453122317790985, 'timestamp': '2025-10-01 04:40:38.698401', 'step': 16962, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:38.752303', 'step': 16962, 'epoch': 3} {'type': 'loss', 'content': 0.10565491020679474, 'timestamp': '2025-10-01 04:40:38.754935', 'step': 16963, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:40:38.808622', 'step': 16963, 'epoch': 3} {'type': 'loss', 'content': 0.04936536028981209, 'timestamp': '2025-10-01 04:40:38.814859', 'step': 16964, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:40:38.873825', 'step': 16964, 'epoch': 3} {'type': 'loss', 'content': 0.0918353796005249, 'timestamp': '2025-10-01 04:40:38.876644', 'step': 16965, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:38.931234', 'step': 16965, 'epoch': 3} {'type': 'loss', 'content': 0.03836621716618538, 'timestamp': '2025-10-01 04:40:38.933642', 'step': 16966, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:38.988374', 'step': 16966, 'epoch': 3} {'type': 'loss', 'content': 0.09693656861782074, 'timestamp': '2025-10-01 04:40:38.990798', 'step': 16967, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:39.045344', 'step': 16967, 'epoch': 3} {'type': 'loss', 'content': 0.10425374656915665, 'timestamp': '2025-10-01 04:40:39.051501', 'step': 16968, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:39.104848', 'step': 16968, 'epoch': 3} {'type': 'loss', 'content': 0.05013106018304825, 'timestamp': '2025-10-01 04:40:39.107232', 'step': 16969, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:39.162228', 'step': 16969, 'epoch': 3} {'type': 'loss', 'content': 0.09230585396289825, 'timestamp': '2025-10-01 04:40:39.164475', 'step': 16970, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:39.225969', 'step': 16970, 'epoch': 3} {'type': 'loss', 'content': 0.1072268933057785, 'timestamp': '2025-10-01 04:40:39.228204', 'step': 16971, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:40:39.286590', 'step': 16971, 'epoch': 3} {'type': 'loss', 'content': 0.11714304983615875, 'timestamp': '2025-10-01 04:40:39.292738', 'step': 16972, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:39.346292', 'step': 16972, 'epoch': 3} {'type': 'loss', 'content': 0.18358772993087769, 'timestamp': '2025-10-01 04:40:39.348469', 'step': 16973, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:40:39.403201', 'step': 16973, 'epoch': 3} {'type': 'loss', 'content': 0.06919558346271515, 'timestamp': '2025-10-01 04:40:39.405562', 'step': 16974, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:39.461949', 'step': 16974, 'epoch': 3} {'type': 'loss', 'content': 0.06134554371237755, 'timestamp': '2025-10-01 04:40:39.464372', 'step': 16975, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:39.520765', 'step': 16975, 'epoch': 3} {'type': 'loss', 'content': 0.059057801961898804, 'timestamp': '2025-10-01 04:40:39.527429', 'step': 16976, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:39.582225', 'step': 16976, 'epoch': 3} {'type': 'loss', 'content': 0.05267918109893799, 'timestamp': '2025-10-01 04:40:39.584790', 'step': 16977, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:40:39.642640', 'step': 16977, 'epoch': 3} {'type': 'loss', 'content': 0.1720968335866928, 'timestamp': '2025-10-01 04:40:39.644988', 'step': 16978, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:39.704262', 'step': 16978, 'epoch': 3} {'type': 'loss', 'content': 0.08218953013420105, 'timestamp': '2025-10-01 04:40:39.706604', 'step': 16979, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:39.761483', 'step': 16979, 'epoch': 3} {'type': 'loss', 'content': 0.08005693554878235, 'timestamp': '2025-10-01 04:40:39.768918', 'step': 16980, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:39.823683', 'step': 16980, 'epoch': 3} {'type': 'loss', 'content': 0.0656300038099289, 'timestamp': '2025-10-01 04:40:39.834963', 'step': 16981, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:39.892636', 'step': 16981, 'epoch': 3} {'type': 'loss', 'content': 0.06646609306335449, 'timestamp': '2025-10-01 04:40:39.895150', 'step': 16982, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:39.953217', 'step': 16982, 'epoch': 3} {'type': 'loss', 'content': 0.09072432667016983, 'timestamp': '2025-10-01 04:40:39.955544', 'step': 16983, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:40.013832', 'step': 16983, 'epoch': 3} {'type': 'loss', 'content': 0.13262929022312164, 'timestamp': '2025-10-01 04:40:40.020665', 'step': 16984, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:40:40.078420', 'step': 16984, 'epoch': 3} {'type': 'loss', 'content': 0.11322624981403351, 'timestamp': '2025-10-01 04:40:40.080805', 'step': 16985, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:40:40.142320', 'step': 16985, 'epoch': 3} {'type': 'loss', 'content': 0.05831920728087425, 'timestamp': '2025-10-01 04:40:40.145923', 'step': 16986, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:40.204221', 'step': 16986, 'epoch': 3} {'type': 'loss', 'content': 0.136398047208786, 'timestamp': '2025-10-01 04:40:40.206550', 'step': 16987, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:40.262834', 'step': 16987, 'epoch': 3} {'type': 'loss', 'content': 0.10988009721040726, 'timestamp': '2025-10-01 04:40:40.269299', 'step': 16988, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:40.322594', 'step': 16988, 'epoch': 3} {'type': 'loss', 'content': 0.08547680824995041, 'timestamp': '2025-10-01 04:40:40.324881', 'step': 16989, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:40.378933', 'step': 16989, 'epoch': 3} {'type': 'loss', 'content': 0.0780864879488945, 'timestamp': '2025-10-01 04:40:40.381998', 'step': 16990, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:40.435483', 'step': 16990, 'epoch': 3} {'type': 'loss', 'content': 0.1201995462179184, 'timestamp': '2025-10-01 04:40:40.437894', 'step': 16991, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:40.491380', 'step': 16991, 'epoch': 3} {'type': 'loss', 'content': 0.12067888677120209, 'timestamp': '2025-10-01 04:40:40.497601', 'step': 16992, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:40.550542', 'step': 16992, 'epoch': 3} {'type': 'loss', 'content': 0.10253765434026718, 'timestamp': '2025-10-01 04:40:40.552766', 'step': 16993, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:40:40.606120', 'step': 16993, 'epoch': 3} {'type': 'loss', 'content': 0.07689155638217926, 'timestamp': '2025-10-01 04:40:40.608349', 'step': 16994, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:40.662593', 'step': 16994, 'epoch': 3} {'type': 'loss', 'content': 0.07163006067276001, 'timestamp': '2025-10-01 04:40:40.665332', 'step': 16995, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:40.719168', 'step': 16995, 'epoch': 3} {'type': 'loss', 'content': 0.08327004313468933, 'timestamp': '2025-10-01 04:40:40.725206', 'step': 16996, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:40.780619', 'step': 16996, 'epoch': 3} {'type': 'loss', 'content': 0.1313881278038025, 'timestamp': '2025-10-01 04:40:40.782967', 'step': 16997, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:40.836575', 'step': 16997, 'epoch': 3} {'type': 'loss', 'content': 0.1344861090183258, 'timestamp': '2025-10-01 04:40:40.844626', 'step': 16998, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:40:40.898902', 'step': 16998, 'epoch': 3} {'type': 'loss', 'content': 0.09981539845466614, 'timestamp': '2025-10-01 04:40:40.901302', 'step': 16999, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:40.960117', 'step': 16999, 'epoch': 3} {'type': 'loss', 'content': 0.0941116064786911, 'timestamp': '2025-10-01 04:40:40.966040', 'step': 17000, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 17000', 'timestamp': '2025-10-01 04:40:41.333013', 'step': 17000, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:41.391444', 'step': 17000, 'epoch': 3} {'type': 'loss', 'content': 0.08135254681110382, 'timestamp': '2025-10-01 04:40:41.393913', 'step': 17001, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:41.448150', 'step': 17001, 'epoch': 3} {'type': 'loss', 'content': 0.09604163467884064, 'timestamp': '2025-10-01 04:40:41.450380', 'step': 17002, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:41.504703', 'step': 17002, 'epoch': 3} {'type': 'loss', 'content': 0.1400902420282364, 'timestamp': '2025-10-01 04:40:41.506979', 'step': 17003, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:41.560532', 'step': 17003, 'epoch': 3} {'type': 'loss', 'content': 0.07624959200620651, 'timestamp': '2025-10-01 04:40:41.567612', 'step': 17004, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:41.621119', 'step': 17004, 'epoch': 3} {'type': 'loss', 'content': 0.05369631201028824, 'timestamp': '2025-10-01 04:40:41.623458', 'step': 17005, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:41.677746', 'step': 17005, 'epoch': 3} {'type': 'loss', 'content': 0.09269829094409943, 'timestamp': '2025-10-01 04:40:41.680160', 'step': 17006, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:41.732916', 'step': 17006, 'epoch': 3} {'type': 'loss', 'content': 0.09239161014556885, 'timestamp': '2025-10-01 04:40:41.735189', 'step': 17007, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:41.789155', 'step': 17007, 'epoch': 3} {'type': 'loss', 'content': 0.07217004150152206, 'timestamp': '2025-10-01 04:40:41.795163', 'step': 17008, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:41.849626', 'step': 17008, 'epoch': 3} {'type': 'loss', 'content': 0.06786151230335236, 'timestamp': '2025-10-01 04:40:41.851813', 'step': 17009, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:41.906537', 'step': 17009, 'epoch': 3} {'type': 'loss', 'content': 0.035602767020463943, 'timestamp': '2025-10-01 04:40:41.919327', 'step': 17010, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:41.972437', 'step': 17010, 'epoch': 3} {'type': 'loss', 'content': 0.11326054483652115, 'timestamp': '2025-10-01 04:40:41.974670', 'step': 17011, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:42.027433', 'step': 17011, 'epoch': 3} {'type': 'loss', 'content': 0.1466371715068817, 'timestamp': '2025-10-01 04:40:42.033419', 'step': 17012, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:42.087388', 'step': 17012, 'epoch': 3} {'type': 'loss', 'content': 0.09795111417770386, 'timestamp': '2025-10-01 04:40:42.089639', 'step': 17013, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:42.143820', 'step': 17013, 'epoch': 3} {'type': 'loss', 'content': 0.14625588059425354, 'timestamp': '2025-10-01 04:40:42.146094', 'step': 17014, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:42.199493', 'step': 17014, 'epoch': 3} {'type': 'loss', 'content': 0.07323269546031952, 'timestamp': '2025-10-01 04:40:42.201818', 'step': 17015, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:42.255862', 'step': 17015, 'epoch': 3} {'type': 'loss', 'content': 0.04445284232497215, 'timestamp': '2025-10-01 04:40:42.261788', 'step': 17016, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:42.314304', 'step': 17016, 'epoch': 3} {'type': 'loss', 'content': 0.1276715248823166, 'timestamp': '2025-10-01 04:40:42.316485', 'step': 17017, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:42.369519', 'step': 17017, 'epoch': 3} {'type': 'loss', 'content': 0.165688157081604, 'timestamp': '2025-10-01 04:40:42.371848', 'step': 17018, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:42.425489', 'step': 17018, 'epoch': 3} {'type': 'loss', 'content': 0.12633571028709412, 'timestamp': '2025-10-01 04:40:42.427767', 'step': 17019, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:42.482462', 'step': 17019, 'epoch': 3} {'type': 'loss', 'content': 0.09247156232595444, 'timestamp': '2025-10-01 04:40:42.488618', 'step': 17020, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:42.542342', 'step': 17020, 'epoch': 3} {'type': 'loss', 'content': 0.11049111932516098, 'timestamp': '2025-10-01 04:40:42.545025', 'step': 17021, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:42.598924', 'step': 17021, 'epoch': 3} {'type': 'loss', 'content': 0.14348916709423065, 'timestamp': '2025-10-01 04:40:42.601192', 'step': 17022, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:40:42.655258', 'step': 17022, 'epoch': 3} {'type': 'loss', 'content': 0.08455102890729904, 'timestamp': '2025-10-01 04:40:42.657775', 'step': 17023, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:40:42.713354', 'step': 17023, 'epoch': 3} {'type': 'loss', 'content': 0.11935703456401825, 'timestamp': '2025-10-01 04:40:42.719635', 'step': 17024, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:42.772640', 'step': 17024, 'epoch': 3} {'type': 'loss', 'content': 0.11447510123252869, 'timestamp': '2025-10-01 04:40:42.775021', 'step': 17025, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:42.830930', 'step': 17025, 'epoch': 3} {'type': 'loss', 'content': 0.15487729012966156, 'timestamp': '2025-10-01 04:40:42.833292', 'step': 17026, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:42.887448', 'step': 17026, 'epoch': 3} {'type': 'loss', 'content': 0.11079558730125427, 'timestamp': '2025-10-01 04:40:42.900026', 'step': 17027, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:40:42.953299', 'step': 17027, 'epoch': 3} {'type': 'loss', 'content': 0.06671823561191559, 'timestamp': '2025-10-01 04:40:42.959219', 'step': 17028, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:43.011852', 'step': 17028, 'epoch': 3} {'type': 'loss', 'content': 0.08306171745061874, 'timestamp': '2025-10-01 04:40:43.014175', 'step': 17029, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:43.077312', 'step': 17029, 'epoch': 3} {'type': 'loss', 'content': 0.06065354496240616, 'timestamp': '2025-10-01 04:40:43.079672', 'step': 17030, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:43.133277', 'step': 17030, 'epoch': 3} {'type': 'loss', 'content': 0.16461452841758728, 'timestamp': '2025-10-01 04:40:43.135597', 'step': 17031, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:43.188787', 'step': 17031, 'epoch': 3} {'type': 'loss', 'content': 0.09798812866210938, 'timestamp': '2025-10-01 04:40:43.194936', 'step': 17032, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:43.247791', 'step': 17032, 'epoch': 3} {'type': 'loss', 'content': 0.09241573512554169, 'timestamp': '2025-10-01 04:40:43.250095', 'step': 17033, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:43.303301', 'step': 17033, 'epoch': 3} {'type': 'loss', 'content': 0.12311609089374542, 'timestamp': '2025-10-01 04:40:43.305689', 'step': 17034, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:40:43.359112', 'step': 17034, 'epoch': 3} {'type': 'loss', 'content': 0.12239035964012146, 'timestamp': '2025-10-01 04:40:43.361406', 'step': 17035, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:43.415934', 'step': 17035, 'epoch': 3} {'type': 'loss', 'content': 0.12211378663778305, 'timestamp': '2025-10-01 04:40:43.421757', 'step': 17036, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:40:43.475040', 'step': 17036, 'epoch': 3} {'type': 'loss', 'content': 0.1390710026025772, 'timestamp': '2025-10-01 04:40:43.477093', 'step': 17037, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:43.529764', 'step': 17037, 'epoch': 3} {'type': 'loss', 'content': 0.09509270638227463, 'timestamp': '2025-10-01 04:40:43.532102', 'step': 17038, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:43.585902', 'step': 17038, 'epoch': 3} {'type': 'loss', 'content': 0.05342933163046837, 'timestamp': '2025-10-01 04:40:43.588112', 'step': 17039, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:43.641541', 'step': 17039, 'epoch': 3} {'type': 'loss', 'content': 0.10137657076120377, 'timestamp': '2025-10-01 04:40:43.647591', 'step': 17040, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:43.701229', 'step': 17040, 'epoch': 3} {'type': 'loss', 'content': 0.07343626022338867, 'timestamp': '2025-10-01 04:40:43.704437', 'step': 17041, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:40:43.758538', 'step': 17041, 'epoch': 3} {'type': 'loss', 'content': 0.09062211960554123, 'timestamp': '2025-10-01 04:40:43.762104', 'step': 17042, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:43.819717', 'step': 17042, 'epoch': 3} {'type': 'loss', 'content': 0.09878819435834885, 'timestamp': '2025-10-01 04:40:43.822022', 'step': 17043, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:43.875740', 'step': 17043, 'epoch': 3} {'type': 'loss', 'content': 0.057254038751125336, 'timestamp': '2025-10-01 04:40:43.902393', 'step': 17044, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:43.973916', 'step': 17044, 'epoch': 3} {'type': 'loss', 'content': 0.08853163570165634, 'timestamp': '2025-10-01 04:40:43.976171', 'step': 17045, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:44.030464', 'step': 17045, 'epoch': 3} {'type': 'loss', 'content': 0.09951718896627426, 'timestamp': '2025-10-01 04:40:44.032894', 'step': 17046, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:44.086276', 'step': 17046, 'epoch': 3} {'type': 'loss', 'content': 0.07741334289312363, 'timestamp': '2025-10-01 04:40:44.088653', 'step': 17047, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:44.143261', 'step': 17047, 'epoch': 3} {'type': 'loss', 'content': 0.18483027815818787, 'timestamp': '2025-10-01 04:40:44.149183', 'step': 17048, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:40:44.203531', 'step': 17048, 'epoch': 3} {'type': 'loss', 'content': 0.0847298800945282, 'timestamp': '2025-10-01 04:40:44.207849', 'step': 17049, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:44.266291', 'step': 17049, 'epoch': 3} {'type': 'loss', 'content': 0.051328908652067184, 'timestamp': '2025-10-01 04:40:44.268665', 'step': 17050, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:44.323228', 'step': 17050, 'epoch': 3} {'type': 'loss', 'content': 0.08642571419477463, 'timestamp': '2025-10-01 04:40:44.325874', 'step': 17051, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:44.379183', 'step': 17051, 'epoch': 3} {'type': 'loss', 'content': 0.1724882870912552, 'timestamp': '2025-10-01 04:40:44.389210', 'step': 17052, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:40:44.441757', 'step': 17052, 'epoch': 3} {'type': 'loss', 'content': 0.03714410960674286, 'timestamp': '2025-10-01 04:40:44.444594', 'step': 17053, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:44.498289', 'step': 17053, 'epoch': 3} {'type': 'loss', 'content': 0.049205534160137177, 'timestamp': '2025-10-01 04:40:44.500663', 'step': 17054, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:40:44.554698', 'step': 17054, 'epoch': 3} {'type': 'loss', 'content': 0.2138151228427887, 'timestamp': '2025-10-01 04:40:44.557512', 'step': 17055, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:44.617008', 'step': 17055, 'epoch': 3} {'type': 'loss', 'content': 0.016232840716838837, 'timestamp': '2025-10-01 04:40:44.623073', 'step': 17056, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:44.678141', 'step': 17056, 'epoch': 3} {'type': 'loss', 'content': 0.10867225378751755, 'timestamp': '2025-10-01 04:40:44.681135', 'step': 17057, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:44.742445', 'step': 17057, 'epoch': 3} {'type': 'loss', 'content': 0.20678642392158508, 'timestamp': '2025-10-01 04:40:44.744586', 'step': 17058, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:44.803829', 'step': 17058, 'epoch': 3} {'type': 'loss', 'content': 0.060056403279304504, 'timestamp': '2025-10-01 04:40:44.807532', 'step': 17059, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:44.862435', 'step': 17059, 'epoch': 3} {'type': 'loss', 'content': 0.06846728920936584, 'timestamp': '2025-10-01 04:40:44.868664', 'step': 17060, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:44.923371', 'step': 17060, 'epoch': 3} {'type': 'loss', 'content': 0.09520696848630905, 'timestamp': '2025-10-01 04:40:44.926130', 'step': 17061, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:44.983018', 'step': 17061, 'epoch': 3} {'type': 'loss', 'content': 0.07740285247564316, 'timestamp': '2025-10-01 04:40:44.985479', 'step': 17062, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:40:45.041060', 'step': 17062, 'epoch': 3} {'type': 'loss', 'content': 0.07979237288236618, 'timestamp': '2025-10-01 04:40:45.043661', 'step': 17063, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:45.098235', 'step': 17063, 'epoch': 3} {'type': 'loss', 'content': 0.09994160383939743, 'timestamp': '2025-10-01 04:40:45.104429', 'step': 17064, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:45.158596', 'step': 17064, 'epoch': 3} {'type': 'loss', 'content': 0.08844994753599167, 'timestamp': '2025-10-01 04:40:45.161261', 'step': 17065, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:45.217235', 'step': 17065, 'epoch': 3} {'type': 'loss', 'content': 0.0992845743894577, 'timestamp': '2025-10-01 04:40:45.220127', 'step': 17066, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:45.275896', 'step': 17066, 'epoch': 3} {'type': 'loss', 'content': 0.06576070189476013, 'timestamp': '2025-10-01 04:40:45.278478', 'step': 17067, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:45.333295', 'step': 17067, 'epoch': 3} {'type': 'loss', 'content': 0.0733528733253479, 'timestamp': '2025-10-01 04:40:45.339981', 'step': 17068, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:45.395959', 'step': 17068, 'epoch': 3} {'type': 'loss', 'content': 0.09413160383701324, 'timestamp': '2025-10-01 04:40:45.399148', 'step': 17069, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:45.454676', 'step': 17069, 'epoch': 3} {'type': 'loss', 'content': 0.0892624631524086, 'timestamp': '2025-10-01 04:40:45.457163', 'step': 17070, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:45.511796', 'step': 17070, 'epoch': 3} {'type': 'loss', 'content': 0.10016486793756485, 'timestamp': '2025-10-01 04:40:45.514120', 'step': 17071, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:45.568861', 'step': 17071, 'epoch': 3} {'type': 'loss', 'content': 0.11390037089586258, 'timestamp': '2025-10-01 04:40:45.575024', 'step': 17072, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:40:45.629462', 'step': 17072, 'epoch': 3} {'type': 'loss', 'content': 0.062442440539598465, 'timestamp': '2025-10-01 04:40:45.631609', 'step': 17073, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:45.685921', 'step': 17073, 'epoch': 3} {'type': 'loss', 'content': 0.06452268362045288, 'timestamp': '2025-10-01 04:40:45.688666', 'step': 17074, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:45.742870', 'step': 17074, 'epoch': 3} {'type': 'loss', 'content': 0.08083290606737137, 'timestamp': '2025-10-01 04:40:45.745208', 'step': 17075, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:45.798902', 'step': 17075, 'epoch': 3} {'type': 'loss', 'content': 0.11200462281703949, 'timestamp': '2025-10-01 04:40:45.805702', 'step': 17076, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:45.863933', 'step': 17076, 'epoch': 3} {'type': 'loss', 'content': 0.10542695224285126, 'timestamp': '2025-10-01 04:40:45.866838', 'step': 17077, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:40:45.921080', 'step': 17077, 'epoch': 3} {'type': 'loss', 'content': 0.018048735335469246, 'timestamp': '2025-10-01 04:40:45.923581', 'step': 17078, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:45.977803', 'step': 17078, 'epoch': 3} {'type': 'loss', 'content': 0.16693897545337677, 'timestamp': '2025-10-01 04:40:45.980370', 'step': 17079, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:46.034251', 'step': 17079, 'epoch': 3} {'type': 'loss', 'content': 0.09787146002054214, 'timestamp': '2025-10-01 04:40:46.040716', 'step': 17080, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:46.094646', 'step': 17080, 'epoch': 3} {'type': 'loss', 'content': 0.09437886625528336, 'timestamp': '2025-10-01 04:40:46.097084', 'step': 17081, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:46.152604', 'step': 17081, 'epoch': 3} {'type': 'loss', 'content': 0.1797051727771759, 'timestamp': '2025-10-01 04:40:46.155311', 'step': 17082, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:46.209969', 'step': 17082, 'epoch': 3} {'type': 'loss', 'content': 0.11482368409633636, 'timestamp': '2025-10-01 04:40:46.212480', 'step': 17083, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:46.266609', 'step': 17083, 'epoch': 3} {'type': 'loss', 'content': 0.11342756450176239, 'timestamp': '2025-10-01 04:40:46.272988', 'step': 17084, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:46.327099', 'step': 17084, 'epoch': 3} {'type': 'loss', 'content': 0.1252252757549286, 'timestamp': '2025-10-01 04:40:46.329386', 'step': 17085, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:46.383756', 'step': 17085, 'epoch': 3} {'type': 'loss', 'content': 0.10829974710941315, 'timestamp': '2025-10-01 04:40:46.386257', 'step': 17086, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:46.440209', 'step': 17086, 'epoch': 3} {'type': 'loss', 'content': 0.08676309138536453, 'timestamp': '2025-10-01 04:40:46.443121', 'step': 17087, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:46.498003', 'step': 17087, 'epoch': 3} {'type': 'loss', 'content': 0.19370070099830627, 'timestamp': '2025-10-01 04:40:46.504493', 'step': 17088, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:46.557917', 'step': 17088, 'epoch': 3} {'type': 'loss', 'content': 0.13058677315711975, 'timestamp': '2025-10-01 04:40:46.560242', 'step': 17089, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:40:46.614672', 'step': 17089, 'epoch': 3} {'type': 'loss', 'content': 0.1258815973997116, 'timestamp': '2025-10-01 04:40:46.617047', 'step': 17090, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:46.670945', 'step': 17090, 'epoch': 3} {'type': 'loss', 'content': 0.0432664230465889, 'timestamp': '2025-10-01 04:40:46.673353', 'step': 17091, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:46.729114', 'step': 17091, 'epoch': 3} {'type': 'loss', 'content': 0.20809431374073029, 'timestamp': '2025-10-01 04:40:46.735416', 'step': 17092, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:46.788094', 'step': 17092, 'epoch': 3} {'type': 'loss', 'content': 0.1571682095527649, 'timestamp': '2025-10-01 04:40:46.790234', 'step': 17093, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:46.843690', 'step': 17093, 'epoch': 3} {'type': 'loss', 'content': 0.13150711357593536, 'timestamp': '2025-10-01 04:40:46.845927', 'step': 17094, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:46.899196', 'step': 17094, 'epoch': 3} {'type': 'loss', 'content': 0.13963453471660614, 'timestamp': '2025-10-01 04:40:46.902024', 'step': 17095, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:40:46.955716', 'step': 17095, 'epoch': 3} {'type': 'loss', 'content': 0.11167196184396744, 'timestamp': '2025-10-01 04:40:46.961665', 'step': 17096, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:47.014537', 'step': 17096, 'epoch': 3} {'type': 'loss', 'content': 0.1024022102355957, 'timestamp': '2025-10-01 04:40:47.016768', 'step': 17097, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:47.070698', 'step': 17097, 'epoch': 3} {'type': 'loss', 'content': 0.08570202440023422, 'timestamp': '2025-10-01 04:40:47.072965', 'step': 17098, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:47.127864', 'step': 17098, 'epoch': 3} {'type': 'loss', 'content': 0.0766996517777443, 'timestamp': '2025-10-01 04:40:47.130100', 'step': 17099, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:47.183628', 'step': 17099, 'epoch': 3} {'type': 'loss', 'content': 0.11872924119234085, 'timestamp': '2025-10-01 04:40:47.189611', 'step': 17100, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:47.242529', 'step': 17100, 'epoch': 3} {'type': 'loss', 'content': 0.07235056906938553, 'timestamp': '2025-10-01 04:40:47.244771', 'step': 17101, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:47.298295', 'step': 17101, 'epoch': 3} {'type': 'loss', 'content': 0.20458972454071045, 'timestamp': '2025-10-01 04:40:47.300568', 'step': 17102, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:47.354303', 'step': 17102, 'epoch': 3} {'type': 'loss', 'content': 0.048809900879859924, 'timestamp': '2025-10-01 04:40:47.356776', 'step': 17103, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:47.411111', 'step': 17103, 'epoch': 3} {'type': 'loss', 'content': 0.09906218200922012, 'timestamp': '2025-10-01 04:40:47.417136', 'step': 17104, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:47.470331', 'step': 17104, 'epoch': 3} {'type': 'loss', 'content': 0.20095065236091614, 'timestamp': '2025-10-01 04:40:47.472788', 'step': 17105, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:47.527173', 'step': 17105, 'epoch': 3} {'type': 'loss', 'content': 0.12558364868164062, 'timestamp': '2025-10-01 04:40:47.529520', 'step': 17106, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:47.584676', 'step': 17106, 'epoch': 3} {'type': 'loss', 'content': 0.10510705411434174, 'timestamp': '2025-10-01 04:40:47.586898', 'step': 17107, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:47.640903', 'step': 17107, 'epoch': 3} {'type': 'loss', 'content': 0.22007490694522858, 'timestamp': '2025-10-01 04:40:47.647722', 'step': 17108, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:47.701004', 'step': 17108, 'epoch': 3} {'type': 'loss', 'content': 0.09188872575759888, 'timestamp': '2025-10-01 04:40:47.703244', 'step': 17109, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:47.756325', 'step': 17109, 'epoch': 3} {'type': 'loss', 'content': 0.07791649550199509, 'timestamp': '2025-10-01 04:40:47.758920', 'step': 17110, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:40:47.811556', 'step': 17110, 'epoch': 3} {'type': 'loss', 'content': 0.06129199638962746, 'timestamp': '2025-10-01 04:40:47.817469', 'step': 17111, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:47.870671', 'step': 17111, 'epoch': 3} {'type': 'loss', 'content': 0.06742320954799652, 'timestamp': '2025-10-01 04:40:47.877885', 'step': 17112, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:47.932256', 'step': 17112, 'epoch': 3} {'type': 'loss', 'content': 0.07354460656642914, 'timestamp': '2025-10-01 04:40:47.934674', 'step': 17113, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:47.988112', 'step': 17113, 'epoch': 3} {'type': 'loss', 'content': 0.08546843379735947, 'timestamp': '2025-10-01 04:40:47.990167', 'step': 17114, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:48.045282', 'step': 17114, 'epoch': 3} {'type': 'loss', 'content': 0.12534701824188232, 'timestamp': '2025-10-01 04:40:48.048043', 'step': 17115, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:48.103763', 'step': 17115, 'epoch': 3} {'type': 'loss', 'content': 0.11324729025363922, 'timestamp': '2025-10-01 04:40:48.109882', 'step': 17116, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:48.163617', 'step': 17116, 'epoch': 3} {'type': 'loss', 'content': 0.06926294416189194, 'timestamp': '2025-10-01 04:40:48.165923', 'step': 17117, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:48.221139', 'step': 17117, 'epoch': 3} {'type': 'loss', 'content': 0.04114735871553421, 'timestamp': '2025-10-01 04:40:48.223289', 'step': 17118, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:48.276852', 'step': 17118, 'epoch': 3} {'type': 'loss', 'content': 0.08957455307245255, 'timestamp': '2025-10-01 04:40:48.279149', 'step': 17119, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:40:48.335131', 'step': 17119, 'epoch': 3} {'type': 'loss', 'content': 0.08061210066080093, 'timestamp': '2025-10-01 04:40:48.341821', 'step': 17120, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:48.395924', 'step': 17120, 'epoch': 3} {'type': 'loss', 'content': 0.03489473834633827, 'timestamp': '2025-10-01 04:40:48.398050', 'step': 17121, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-01 04:40:48.451329', 'step': 17121, 'epoch': 3} {'type': 'loss', 'content': 0.09073197841644287, 'timestamp': '2025-10-01 04:40:48.453554', 'step': 17122, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:48.507746', 'step': 17122, 'epoch': 3} {'type': 'loss', 'content': 0.03819822892546654, 'timestamp': '2025-10-01 04:40:48.510426', 'step': 17123, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:48.564801', 'step': 17123, 'epoch': 3} {'type': 'loss', 'content': 0.0675557553768158, 'timestamp': '2025-10-01 04:40:48.571016', 'step': 17124, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:48.624605', 'step': 17124, 'epoch': 3} {'type': 'loss', 'content': 0.10362173616886139, 'timestamp': '2025-10-01 04:40:48.626622', 'step': 17125, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:48.679589', 'step': 17125, 'epoch': 3} {'type': 'loss', 'content': 0.1410120725631714, 'timestamp': '2025-10-01 04:40:48.681658', 'step': 17126, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:48.736240', 'step': 17126, 'epoch': 3} {'type': 'loss', 'content': 0.11463599652051926, 'timestamp': '2025-10-01 04:40:48.738590', 'step': 17127, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:40:48.793368', 'step': 17127, 'epoch': 3} {'type': 'loss', 'content': 0.1297694742679596, 'timestamp': '2025-10-01 04:40:48.799803', 'step': 17128, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:48.853522', 'step': 17128, 'epoch': 3} {'type': 'loss', 'content': 0.08018063753843307, 'timestamp': '2025-10-01 04:40:48.855783', 'step': 17129, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:48.910253', 'step': 17129, 'epoch': 3} {'type': 'loss', 'content': 0.17306876182556152, 'timestamp': '2025-10-01 04:40:48.913163', 'step': 17130, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:48.967615', 'step': 17130, 'epoch': 3} {'type': 'loss', 'content': 0.15312372148036957, 'timestamp': '2025-10-01 04:40:48.970119', 'step': 17131, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:49.023121', 'step': 17131, 'epoch': 3} {'type': 'loss', 'content': 0.17301048338413239, 'timestamp': '2025-10-01 04:40:49.037936', 'step': 17132, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:49.090532', 'step': 17132, 'epoch': 3} {'type': 'loss', 'content': 0.047909945249557495, 'timestamp': '2025-10-01 04:40:49.092743', 'step': 17133, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:49.146368', 'step': 17133, 'epoch': 3} {'type': 'loss', 'content': 0.10946778208017349, 'timestamp': '2025-10-01 04:40:49.148365', 'step': 17134, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:40:49.201936', 'step': 17134, 'epoch': 3} {'type': 'loss', 'content': 0.08962661772966385, 'timestamp': '2025-10-01 04:40:49.204228', 'step': 17135, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:49.257832', 'step': 17135, 'epoch': 3} {'type': 'loss', 'content': 0.14330080151557922, 'timestamp': '2025-10-01 04:40:49.263869', 'step': 17136, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:49.317608', 'step': 17136, 'epoch': 3} {'type': 'loss', 'content': 0.02028692327439785, 'timestamp': '2025-10-01 04:40:49.319953', 'step': 17137, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:49.373534', 'step': 17137, 'epoch': 3} {'type': 'loss', 'content': 0.15253007411956787, 'timestamp': '2025-10-01 04:40:49.375643', 'step': 17138, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:49.429051', 'step': 17138, 'epoch': 3} {'type': 'loss', 'content': 0.07353458553552628, 'timestamp': '2025-10-01 04:40:49.431673', 'step': 17139, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:40:49.485843', 'step': 17139, 'epoch': 3} {'type': 'loss', 'content': 0.10306508094072342, 'timestamp': '2025-10-01 04:40:49.492092', 'step': 17140, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:49.545191', 'step': 17140, 'epoch': 3} {'type': 'loss', 'content': 0.062423985451459885, 'timestamp': '2025-10-01 04:40:49.547672', 'step': 17141, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:49.601655', 'step': 17141, 'epoch': 3} {'type': 'loss', 'content': 0.06361818313598633, 'timestamp': '2025-10-01 04:40:49.603774', 'step': 17142, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:49.657135', 'step': 17142, 'epoch': 3} {'type': 'loss', 'content': 0.16773059964179993, 'timestamp': '2025-10-01 04:40:49.659248', 'step': 17143, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:49.716465', 'step': 17143, 'epoch': 3} {'type': 'loss', 'content': 0.12562443315982819, 'timestamp': '2025-10-01 04:40:49.722428', 'step': 17144, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:49.775014', 'step': 17144, 'epoch': 3} {'type': 'loss', 'content': 0.1077113151550293, 'timestamp': '2025-10-01 04:40:49.778716', 'step': 17145, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:49.831481', 'step': 17145, 'epoch': 3} {'type': 'loss', 'content': 0.1429724395275116, 'timestamp': '2025-10-01 04:40:49.833581', 'step': 17146, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:49.889654', 'step': 17146, 'epoch': 3} {'type': 'loss', 'content': 0.09205858409404755, 'timestamp': '2025-10-01 04:40:49.891810', 'step': 17147, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:40:49.948205', 'step': 17147, 'epoch': 3} {'type': 'loss', 'content': 0.07980749756097794, 'timestamp': '2025-10-01 04:40:49.955009', 'step': 17148, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:40:50.009538', 'step': 17148, 'epoch': 3} {'type': 'loss', 'content': 0.0702754408121109, 'timestamp': '2025-10-01 04:40:50.011821', 'step': 17149, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:50.066938', 'step': 17149, 'epoch': 3} {'type': 'loss', 'content': 0.08565566688776016, 'timestamp': '2025-10-01 04:40:50.069180', 'step': 17150, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:50.123424', 'step': 17150, 'epoch': 3} {'type': 'loss', 'content': 0.10418090224266052, 'timestamp': '2025-10-01 04:40:50.125653', 'step': 17151, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:50.181264', 'step': 17151, 'epoch': 3} {'type': 'loss', 'content': 0.11744625121355057, 'timestamp': '2025-10-01 04:40:50.187727', 'step': 17152, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:50.241672', 'step': 17152, 'epoch': 3} {'type': 'loss', 'content': 0.07306879013776779, 'timestamp': '2025-10-01 04:40:50.243944', 'step': 17153, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:50.300636', 'step': 17153, 'epoch': 3} {'type': 'loss', 'content': 0.09940005838871002, 'timestamp': '2025-10-01 04:40:50.302911', 'step': 17154, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:50.357202', 'step': 17154, 'epoch': 3} {'type': 'loss', 'content': 0.10262919962406158, 'timestamp': '2025-10-01 04:40:50.359544', 'step': 17155, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:50.413863', 'step': 17155, 'epoch': 3} {'type': 'loss', 'content': 0.08648116886615753, 'timestamp': '2025-10-01 04:40:50.420193', 'step': 17156, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:50.472877', 'step': 17156, 'epoch': 3} {'type': 'loss', 'content': 0.14179252088069916, 'timestamp': '2025-10-01 04:40:50.479466', 'step': 17157, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:50.535856', 'step': 17157, 'epoch': 3} {'type': 'loss', 'content': 0.09416742622852325, 'timestamp': '2025-10-01 04:40:50.544839', 'step': 17158, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:50.599755', 'step': 17158, 'epoch': 3} {'type': 'loss', 'content': 0.07304452359676361, 'timestamp': '2025-10-01 04:40:50.603352', 'step': 17159, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:50.656617', 'step': 17159, 'epoch': 3} {'type': 'loss', 'content': 0.02794725075364113, 'timestamp': '2025-10-01 04:40:50.662820', 'step': 17160, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:50.715809', 'step': 17160, 'epoch': 3} {'type': 'loss', 'content': 0.16056948900222778, 'timestamp': '2025-10-01 04:40:50.717960', 'step': 17161, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:50.771252', 'step': 17161, 'epoch': 3} {'type': 'loss', 'content': 0.09135497361421585, 'timestamp': '2025-10-01 04:40:50.773721', 'step': 17162, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:40:50.828711', 'step': 17162, 'epoch': 3} {'type': 'loss', 'content': 0.16169194877147675, 'timestamp': '2025-10-01 04:40:50.830974', 'step': 17163, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:40:50.884767', 'step': 17163, 'epoch': 3} {'type': 'loss', 'content': 0.04563349485397339, 'timestamp': '2025-10-01 04:40:50.891132', 'step': 17164, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:50.952154', 'step': 17164, 'epoch': 3} {'type': 'loss', 'content': 0.1682194322347641, 'timestamp': '2025-10-01 04:40:50.954387', 'step': 17165, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:40:51.008490', 'step': 17165, 'epoch': 3} {'type': 'loss', 'content': 0.12923237681388855, 'timestamp': '2025-10-01 04:40:51.010671', 'step': 17166, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:40:51.064187', 'step': 17166, 'epoch': 3} {'type': 'loss', 'content': 0.09314563125371933, 'timestamp': '2025-10-01 04:40:51.066263', 'step': 17167, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:40:51.120021', 'step': 17167, 'epoch': 3} {'type': 'loss', 'content': 0.07623262703418732, 'timestamp': '2025-10-01 04:40:51.126230', 'step': 17168, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:51.180181', 'step': 17168, 'epoch': 3} {'type': 'loss', 'content': 0.06790778785943985, 'timestamp': '2025-10-01 04:40:51.182405', 'step': 17169, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:40:51.236264', 'step': 17169, 'epoch': 3} {'type': 'loss', 'content': 0.11313702166080475, 'timestamp': '2025-10-01 04:40:51.238600', 'step': 17170, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:40:51.292342', 'step': 17170, 'epoch': 3} {'type': 'loss', 'content': 0.07528278231620789, 'timestamp': '2025-10-01 04:40:51.294530', 'step': 17171, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:51.347373', 'step': 17171, 'epoch': 3} {'type': 'loss', 'content': 0.1119987964630127, 'timestamp': '2025-10-01 04:40:51.353738', 'step': 17172, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:51.406729', 'step': 17172, 'epoch': 3} {'type': 'loss', 'content': 0.07967919856309891, 'timestamp': '2025-10-01 04:40:51.408983', 'step': 17173, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:51.462150', 'step': 17173, 'epoch': 3} {'type': 'loss', 'content': 0.10920345038175583, 'timestamp': '2025-10-01 04:40:51.464773', 'step': 17174, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:40:51.518687', 'step': 17174, 'epoch': 3} {'type': 'loss', 'content': 0.09926947951316833, 'timestamp': '2025-10-01 04:40:51.521132', 'step': 17175, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:51.580694', 'step': 17175, 'epoch': 3} {'type': 'loss', 'content': 0.06114320084452629, 'timestamp': '2025-10-01 04:40:51.586899', 'step': 17176, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:51.639761', 'step': 17176, 'epoch': 3} {'type': 'loss', 'content': 0.16732852160930634, 'timestamp': '2025-10-01 04:40:51.642220', 'step': 17177, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:51.695683', 'step': 17177, 'epoch': 3} {'type': 'loss', 'content': 0.05083772912621498, 'timestamp': '2025-10-01 04:40:51.698134', 'step': 17178, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:51.752160', 'step': 17178, 'epoch': 3} {'type': 'loss', 'content': 0.14210021495819092, 'timestamp': '2025-10-01 04:40:51.754886', 'step': 17179, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:51.822941', 'step': 17179, 'epoch': 3} {'type': 'loss', 'content': 0.04672889783978462, 'timestamp': '2025-10-01 04:40:51.830568', 'step': 17180, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:51.900529', 'step': 17180, 'epoch': 3} {'type': 'loss', 'content': 0.1548372358083725, 'timestamp': '2025-10-01 04:40:51.920953', 'step': 17181, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:51.983141', 'step': 17181, 'epoch': 3} {'type': 'loss', 'content': 0.13341887295246124, 'timestamp': '2025-10-01 04:40:51.985646', 'step': 17182, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:52.050966', 'step': 17182, 'epoch': 3} {'type': 'loss', 'content': 0.08068639785051346, 'timestamp': '2025-10-01 04:40:52.054697', 'step': 17183, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:52.112875', 'step': 17183, 'epoch': 3} {'type': 'loss', 'content': 0.07910386472940445, 'timestamp': '2025-10-01 04:40:52.122325', 'step': 17184, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:52.179230', 'step': 17184, 'epoch': 3} {'type': 'loss', 'content': 0.11969715356826782, 'timestamp': '2025-10-01 04:40:52.181822', 'step': 17185, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:52.243182', 'step': 17185, 'epoch': 3} {'type': 'loss', 'content': 0.10577721893787384, 'timestamp': '2025-10-01 04:40:52.246892', 'step': 17186, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:52.311779', 'step': 17186, 'epoch': 3} {'type': 'loss', 'content': 0.11641770601272583, 'timestamp': '2025-10-01 04:40:52.315158', 'step': 17187, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:52.394608', 'step': 17187, 'epoch': 3} {'type': 'loss', 'content': 0.029484929516911507, 'timestamp': '2025-10-01 04:40:52.407112', 'step': 17188, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:52.493610', 'step': 17188, 'epoch': 3} {'type': 'loss', 'content': 0.19038626551628113, 'timestamp': '2025-10-01 04:40:52.499950', 'step': 17189, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:52.565174', 'step': 17189, 'epoch': 3} {'type': 'loss', 'content': 0.05329757183790207, 'timestamp': '2025-10-01 04:40:52.568027', 'step': 17190, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:52.646806', 'step': 17190, 'epoch': 3} {'type': 'loss', 'content': 0.08962411433458328, 'timestamp': '2025-10-01 04:40:52.655387', 'step': 17191, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:52.737431', 'step': 17191, 'epoch': 3} {'type': 'loss', 'content': 0.12482579797506332, 'timestamp': '2025-10-01 04:40:52.746784', 'step': 17192, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:52.818109', 'step': 17192, 'epoch': 3} {'type': 'loss', 'content': 0.09090427309274673, 'timestamp': '2025-10-01 04:40:52.826148', 'step': 17193, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:52.888444', 'step': 17193, 'epoch': 3} {'type': 'loss', 'content': 0.092681385576725, 'timestamp': '2025-10-01 04:40:52.891205', 'step': 17194, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:53.013452', 'step': 17194, 'epoch': 3} {'type': 'loss', 'content': 0.10362694412469864, 'timestamp': '2025-10-01 04:40:53.022106', 'step': 17195, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:53.083922', 'step': 17195, 'epoch': 3} {'type': 'loss', 'content': 0.13435280323028564, 'timestamp': '2025-10-01 04:40:53.093321', 'step': 17196, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:53.164016', 'step': 17196, 'epoch': 3} {'type': 'loss', 'content': 0.09112579375505447, 'timestamp': '2025-10-01 04:40:53.166281', 'step': 17197, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:53.222690', 'step': 17197, 'epoch': 3} {'type': 'loss', 'content': 0.12561218440532684, 'timestamp': '2025-10-01 04:40:53.225066', 'step': 17198, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:53.281119', 'step': 17198, 'epoch': 3} {'type': 'loss', 'content': 0.10027658939361572, 'timestamp': '2025-10-01 04:40:53.283457', 'step': 17199, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:53.340287', 'step': 17199, 'epoch': 3} {'type': 'loss', 'content': 0.14014005661010742, 'timestamp': '2025-10-01 04:40:53.347042', 'step': 17200, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:53.403259', 'step': 17200, 'epoch': 3} {'type': 'loss', 'content': 0.15075422823429108, 'timestamp': '2025-10-01 04:40:53.405531', 'step': 17201, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:53.461954', 'step': 17201, 'epoch': 3} {'type': 'loss', 'content': 0.10337555408477783, 'timestamp': '2025-10-01 04:40:53.464209', 'step': 17202, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:53.521099', 'step': 17202, 'epoch': 3} {'type': 'loss', 'content': 0.12942926585674286, 'timestamp': '2025-10-01 04:40:53.523341', 'step': 17203, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:53.579942', 'step': 17203, 'epoch': 3} {'type': 'loss', 'content': 0.10964028537273407, 'timestamp': '2025-10-01 04:40:53.586667', 'step': 17204, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:53.641866', 'step': 17204, 'epoch': 3} {'type': 'loss', 'content': 0.06393341720104218, 'timestamp': '2025-10-01 04:40:53.644142', 'step': 17205, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:40:53.698089', 'step': 17205, 'epoch': 3} {'type': 'loss', 'content': 0.048564594238996506, 'timestamp': '2025-10-01 04:40:53.700668', 'step': 17206, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:53.754237', 'step': 17206, 'epoch': 3} {'type': 'loss', 'content': 0.038247060030698776, 'timestamp': '2025-10-01 04:40:53.756598', 'step': 17207, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:53.809639', 'step': 17207, 'epoch': 3} {'type': 'loss', 'content': 0.1015322133898735, 'timestamp': '2025-10-01 04:40:53.815820', 'step': 17208, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:40:53.868603', 'step': 17208, 'epoch': 3} {'type': 'loss', 'content': 0.12948757410049438, 'timestamp': '2025-10-01 04:40:53.871070', 'step': 17209, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:40:53.924340', 'step': 17209, 'epoch': 3} {'type': 'loss', 'content': 0.11826498806476593, 'timestamp': '2025-10-01 04:40:53.926657', 'step': 17210, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:53.980872', 'step': 17210, 'epoch': 3} {'type': 'loss', 'content': 0.17850318551063538, 'timestamp': '2025-10-01 04:40:53.983071', 'step': 17211, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:54.037937', 'step': 17211, 'epoch': 3} {'type': 'loss', 'content': 0.12676256895065308, 'timestamp': '2025-10-01 04:40:54.044086', 'step': 17212, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:54.099902', 'step': 17212, 'epoch': 3} {'type': 'loss', 'content': 0.09009997546672821, 'timestamp': '2025-10-01 04:40:54.102152', 'step': 17213, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:54.164817', 'step': 17213, 'epoch': 3} {'type': 'loss', 'content': 0.10175490379333496, 'timestamp': '2025-10-01 04:40:54.167074', 'step': 17214, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:54.231133', 'step': 17214, 'epoch': 3} {'type': 'loss', 'content': 0.1476055234670639, 'timestamp': '2025-10-01 04:40:54.233519', 'step': 17215, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:54.289391', 'step': 17215, 'epoch': 3} {'type': 'loss', 'content': 0.031149543821811676, 'timestamp': '2025-10-01 04:40:54.295873', 'step': 17216, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:54.352782', 'step': 17216, 'epoch': 3} {'type': 'loss', 'content': 0.10369394719600677, 'timestamp': '2025-10-01 04:40:54.355030', 'step': 17217, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:40:54.410603', 'step': 17217, 'epoch': 3} {'type': 'loss', 'content': 0.0510246641933918, 'timestamp': '2025-10-01 04:40:54.412896', 'step': 17218, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:54.466602', 'step': 17218, 'epoch': 3} {'type': 'loss', 'content': 0.1006099209189415, 'timestamp': '2025-10-01 04:40:54.468675', 'step': 17219, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:54.525854', 'step': 17219, 'epoch': 3} {'type': 'loss', 'content': 0.09789341688156128, 'timestamp': '2025-10-01 04:40:54.532434', 'step': 17220, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:54.590336', 'step': 17220, 'epoch': 3} {'type': 'loss', 'content': 0.05033233389258385, 'timestamp': '2025-10-01 04:40:54.592665', 'step': 17221, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:54.655001', 'step': 17221, 'epoch': 3} {'type': 'loss', 'content': 0.0400606244802475, 'timestamp': '2025-10-01 04:40:54.657240', 'step': 17222, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:54.715763', 'step': 17222, 'epoch': 3} {'type': 'loss', 'content': 0.1458924263715744, 'timestamp': '2025-10-01 04:40:54.718138', 'step': 17223, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:54.776491', 'step': 17223, 'epoch': 3} {'type': 'loss', 'content': 0.10390722006559372, 'timestamp': '2025-10-01 04:40:54.783454', 'step': 17224, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:54.842139', 'step': 17224, 'epoch': 3} {'type': 'loss', 'content': 0.10221441090106964, 'timestamp': '2025-10-01 04:40:54.844226', 'step': 17225, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:54.899374', 'step': 17225, 'epoch': 3} {'type': 'loss', 'content': 0.10694501549005508, 'timestamp': '2025-10-01 04:40:54.901796', 'step': 17226, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:54.959546', 'step': 17226, 'epoch': 3} {'type': 'loss', 'content': 0.04061811789870262, 'timestamp': '2025-10-01 04:40:54.962056', 'step': 17227, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:55.016938', 'step': 17227, 'epoch': 3} {'type': 'loss', 'content': 0.1562679409980774, 'timestamp': '2025-10-01 04:40:55.025666', 'step': 17228, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:40:55.080678', 'step': 17228, 'epoch': 3} {'type': 'loss', 'content': 0.11210419237613678, 'timestamp': '2025-10-01 04:40:55.082960', 'step': 17229, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:55.137597', 'step': 17229, 'epoch': 3} {'type': 'loss', 'content': 0.16495653986930847, 'timestamp': '2025-10-01 04:40:55.140807', 'step': 17230, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:55.197178', 'step': 17230, 'epoch': 3} {'type': 'loss', 'content': 0.0416935570538044, 'timestamp': '2025-10-01 04:40:55.199582', 'step': 17231, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:55.254245', 'step': 17231, 'epoch': 3} {'type': 'loss', 'content': 0.05459960177540779, 'timestamp': '2025-10-01 04:40:55.260633', 'step': 17232, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:40:55.317088', 'step': 17232, 'epoch': 3} {'type': 'loss', 'content': 0.06490764021873474, 'timestamp': '2025-10-01 04:40:55.319876', 'step': 17233, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:55.380401', 'step': 17233, 'epoch': 3} {'type': 'loss', 'content': 0.07006995379924774, 'timestamp': '2025-10-01 04:40:55.383342', 'step': 17234, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:40:55.456817', 'step': 17234, 'epoch': 3} {'type': 'loss', 'content': 0.0770362839102745, 'timestamp': '2025-10-01 04:40:55.459393', 'step': 17235, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:40:55.522027', 'step': 17235, 'epoch': 3} {'type': 'loss', 'content': 0.07502727210521698, 'timestamp': '2025-10-01 04:40:55.528587', 'step': 17236, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:40:55.591611', 'step': 17236, 'epoch': 3} {'type': 'loss', 'content': 0.04437801241874695, 'timestamp': '2025-10-01 04:40:55.593834', 'step': 17237, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:55.649274', 'step': 17237, 'epoch': 3} {'type': 'loss', 'content': 0.05537128075957298, 'timestamp': '2025-10-01 04:40:55.655203', 'step': 17238, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:55.713737', 'step': 17238, 'epoch': 3} {'type': 'loss', 'content': 0.11519906669855118, 'timestamp': '2025-10-01 04:40:55.716666', 'step': 17239, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-01 04:40:55.772165', 'step': 17239, 'epoch': 3} {'type': 'loss', 'content': 0.031027665361762047, 'timestamp': '2025-10-01 04:40:55.778837', 'step': 17240, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:40:55.836356', 'step': 17240, 'epoch': 3} {'type': 'loss', 'content': 0.1160675659775734, 'timestamp': '2025-10-01 04:40:55.838674', 'step': 17241, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:40:55.892572', 'step': 17241, 'epoch': 3} {'type': 'loss', 'content': 0.08715329319238663, 'timestamp': '2025-10-01 04:40:55.895355', 'step': 17242, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-01 04:41:09.057818', 'step': 17242, 'epoch': 3} {'type': 'pplx', 'content': 11244.08830117211, 'timestamp': '2025-10-01 04:41:09.060800', 'step': 17242, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:09.116747', 'step': 17242, 'epoch': 3} {'type': 'loss', 'content': 0.07019030302762985, 'timestamp': '2025-10-01 04:41:09.119124', 'step': 17243, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:09.177674', 'step': 17243, 'epoch': 3} {'type': 'loss', 'content': 0.1427658647298813, 'timestamp': '2025-10-01 04:41:09.186169', 'step': 17244, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:41:09.253999', 'step': 17244, 'epoch': 3} {'type': 'loss', 'content': 0.16735942661762238, 'timestamp': '2025-10-01 04:41:09.263253', 'step': 17245, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:09.316203', 'step': 17245, 'epoch': 3} {'type': 'loss', 'content': 0.05149851739406586, 'timestamp': '2025-10-01 04:41:09.318583', 'step': 17246, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:09.375273', 'step': 17246, 'epoch': 3} {'type': 'loss', 'content': 0.07631521672010422, 'timestamp': '2025-10-01 04:41:09.377708', 'step': 17247, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:41:09.431569', 'step': 17247, 'epoch': 3} {'type': 'loss', 'content': 0.06794069707393646, 'timestamp': '2025-10-01 04:41:09.437651', 'step': 17248, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:09.490706', 'step': 17248, 'epoch': 3} {'type': 'loss', 'content': 0.12217088043689728, 'timestamp': '2025-10-01 04:41:09.497188', 'step': 17249, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:09.561809', 'step': 17249, 'epoch': 3} {'type': 'loss', 'content': 0.037427544593811035, 'timestamp': '2025-10-01 04:41:09.563986', 'step': 17250, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:09.626566', 'step': 17250, 'epoch': 3} {'type': 'loss', 'content': 0.08937349170446396, 'timestamp': '2025-10-01 04:41:09.628833', 'step': 17251, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:09.681958', 'step': 17251, 'epoch': 3} {'type': 'loss', 'content': 0.16143549978733063, 'timestamp': '2025-10-01 04:41:09.688099', 'step': 17252, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:09.744136', 'step': 17252, 'epoch': 3} {'type': 'loss', 'content': 0.07044633477926254, 'timestamp': '2025-10-01 04:41:09.756642', 'step': 17253, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:41:09.811110', 'step': 17253, 'epoch': 3} {'type': 'loss', 'content': 0.12162293493747711, 'timestamp': '2025-10-01 04:41:09.813500', 'step': 17254, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:09.876896', 'step': 17254, 'epoch': 3} {'type': 'loss', 'content': 0.09577663242816925, 'timestamp': '2025-10-01 04:41:09.879633', 'step': 17255, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:09.938122', 'step': 17255, 'epoch': 3} {'type': 'loss', 'content': 0.0969768688082695, 'timestamp': '2025-10-01 04:41:09.944885', 'step': 17256, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:09.999373', 'step': 17256, 'epoch': 3} {'type': 'loss', 'content': 0.07455937564373016, 'timestamp': '2025-10-01 04:41:10.002054', 'step': 17257, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:41:10.056626', 'step': 17257, 'epoch': 3} {'type': 'loss', 'content': 0.039817020297050476, 'timestamp': '2025-10-01 04:41:10.059026', 'step': 17258, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:10.112861', 'step': 17258, 'epoch': 3} {'type': 'loss', 'content': 0.05728110671043396, 'timestamp': '2025-10-01 04:41:10.115116', 'step': 17259, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:10.168667', 'step': 17259, 'epoch': 3} {'type': 'loss', 'content': 0.12426936626434326, 'timestamp': '2025-10-01 04:41:10.175047', 'step': 17260, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:10.230004', 'step': 17260, 'epoch': 3} {'type': 'loss', 'content': 0.05786331743001938, 'timestamp': '2025-10-01 04:41:10.232407', 'step': 17261, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:10.286371', 'step': 17261, 'epoch': 3} {'type': 'loss', 'content': 0.1019243523478508, 'timestamp': '2025-10-01 04:41:10.288870', 'step': 17262, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:10.342602', 'step': 17262, 'epoch': 3} {'type': 'loss', 'content': 0.12816071510314941, 'timestamp': '2025-10-01 04:41:10.344939', 'step': 17263, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:10.398856', 'step': 17263, 'epoch': 3} {'type': 'loss', 'content': 0.08883006870746613, 'timestamp': '2025-10-01 04:41:10.405153', 'step': 17264, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:10.458649', 'step': 17264, 'epoch': 3} {'type': 'loss', 'content': 0.11701414734125137, 'timestamp': '2025-10-01 04:41:10.460933', 'step': 17265, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:10.514329', 'step': 17265, 'epoch': 3} {'type': 'loss', 'content': 0.07833902537822723, 'timestamp': '2025-10-01 04:41:10.516463', 'step': 17266, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:10.570156', 'step': 17266, 'epoch': 3} {'type': 'loss', 'content': 0.051327042281627655, 'timestamp': '2025-10-01 04:41:10.572317', 'step': 17267, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:10.625617', 'step': 17267, 'epoch': 3} {'type': 'loss', 'content': 0.06390002369880676, 'timestamp': '2025-10-01 04:41:10.631672', 'step': 17268, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:10.685121', 'step': 17268, 'epoch': 3} {'type': 'loss', 'content': 0.1375739574432373, 'timestamp': '2025-10-01 04:41:10.687303', 'step': 17269, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:10.739949', 'step': 17269, 'epoch': 3} {'type': 'loss', 'content': 0.06367244571447372, 'timestamp': '2025-10-01 04:41:10.742118', 'step': 17270, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:10.795338', 'step': 17270, 'epoch': 3} {'type': 'loss', 'content': 0.17345616221427917, 'timestamp': '2025-10-01 04:41:10.797713', 'step': 17271, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:10.850766', 'step': 17271, 'epoch': 3} {'type': 'loss', 'content': 0.04415706545114517, 'timestamp': '2025-10-01 04:41:10.856844', 'step': 17272, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:10.909868', 'step': 17272, 'epoch': 3} {'type': 'loss', 'content': 0.09213273972272873, 'timestamp': '2025-10-01 04:41:10.912060', 'step': 17273, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:41:10.964934', 'step': 17273, 'epoch': 3} {'type': 'loss', 'content': 0.03644063323736191, 'timestamp': '2025-10-01 04:41:10.967134', 'step': 17274, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:11.020418', 'step': 17274, 'epoch': 3} {'type': 'loss', 'content': 0.06915944814682007, 'timestamp': '2025-10-01 04:41:11.022668', 'step': 17275, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:11.075772', 'step': 17275, 'epoch': 3} {'type': 'loss', 'content': 0.07443396747112274, 'timestamp': '2025-10-01 04:41:11.081791', 'step': 17276, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:11.134232', 'step': 17276, 'epoch': 3} {'type': 'loss', 'content': 0.09833730757236481, 'timestamp': '2025-10-01 04:41:11.136351', 'step': 17277, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:11.189592', 'step': 17277, 'epoch': 3} {'type': 'loss', 'content': 0.06118538975715637, 'timestamp': '2025-10-01 04:41:11.191774', 'step': 17278, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:41:11.244714', 'step': 17278, 'epoch': 3} {'type': 'loss', 'content': 0.07292981445789337, 'timestamp': '2025-10-01 04:41:11.246949', 'step': 17279, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:11.300358', 'step': 17279, 'epoch': 3} {'type': 'loss', 'content': 0.15542522072792053, 'timestamp': '2025-10-01 04:41:11.306296', 'step': 17280, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:11.361328', 'step': 17280, 'epoch': 3} {'type': 'loss', 'content': 0.09172389656305313, 'timestamp': '2025-10-01 04:41:11.363548', 'step': 17281, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:11.417376', 'step': 17281, 'epoch': 3} {'type': 'loss', 'content': 0.12693053483963013, 'timestamp': '2025-10-01 04:41:11.419726', 'step': 17282, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:11.473166', 'step': 17282, 'epoch': 3} {'type': 'loss', 'content': 0.19601155817508698, 'timestamp': '2025-10-01 04:41:11.476033', 'step': 17283, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:41:11.529350', 'step': 17283, 'epoch': 3} {'type': 'loss', 'content': 0.2408902794122696, 'timestamp': '2025-10-01 04:41:11.535253', 'step': 17284, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:11.587598', 'step': 17284, 'epoch': 3} {'type': 'loss', 'content': 0.10877984762191772, 'timestamp': '2025-10-01 04:41:11.589803', 'step': 17285, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:11.642650', 'step': 17285, 'epoch': 3} {'type': 'loss', 'content': 0.14293363690376282, 'timestamp': '2025-10-01 04:41:11.644938', 'step': 17286, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:11.698017', 'step': 17286, 'epoch': 3} {'type': 'loss', 'content': 0.1489105373620987, 'timestamp': '2025-10-01 04:41:11.700338', 'step': 17287, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:11.753158', 'step': 17287, 'epoch': 3} {'type': 'loss', 'content': 0.06333138793706894, 'timestamp': '2025-10-01 04:41:11.758993', 'step': 17288, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:11.813435', 'step': 17288, 'epoch': 3} {'type': 'loss', 'content': 0.1126556321978569, 'timestamp': '2025-10-01 04:41:11.815716', 'step': 17289, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:11.868589', 'step': 17289, 'epoch': 3} {'type': 'loss', 'content': 0.04480304196476936, 'timestamp': '2025-10-01 04:41:11.871103', 'step': 17290, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:11.924098', 'step': 17290, 'epoch': 3} {'type': 'loss', 'content': 0.04218754917383194, 'timestamp': '2025-10-01 04:41:11.926296', 'step': 17291, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:11.979413', 'step': 17291, 'epoch': 3} {'type': 'loss', 'content': 0.06916381418704987, 'timestamp': '2025-10-01 04:41:11.985311', 'step': 17292, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:12.037904', 'step': 17292, 'epoch': 3} {'type': 'loss', 'content': 0.11668110638856888, 'timestamp': '2025-10-01 04:41:12.040059', 'step': 17293, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:12.093015', 'step': 17293, 'epoch': 3} {'type': 'loss', 'content': 0.10009448975324631, 'timestamp': '2025-10-01 04:41:12.095229', 'step': 17294, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:12.148713', 'step': 17294, 'epoch': 3} {'type': 'loss', 'content': 0.07304731011390686, 'timestamp': '2025-10-01 04:41:12.150909', 'step': 17295, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:12.203937', 'step': 17295, 'epoch': 3} {'type': 'loss', 'content': 0.1265837401151657, 'timestamp': '2025-10-01 04:41:12.209683', 'step': 17296, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:12.262623', 'step': 17296, 'epoch': 3} {'type': 'loss', 'content': 0.10224846005439758, 'timestamp': '2025-10-01 04:41:12.264812', 'step': 17297, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:12.317612', 'step': 17297, 'epoch': 3} {'type': 'loss', 'content': 0.10792829841375351, 'timestamp': '2025-10-01 04:41:12.319767', 'step': 17298, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:12.373154', 'step': 17298, 'epoch': 3} {'type': 'loss', 'content': 0.0704009085893631, 'timestamp': '2025-10-01 04:41:12.375425', 'step': 17299, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:12.428601', 'step': 17299, 'epoch': 3} {'type': 'loss', 'content': 0.05811329558491707, 'timestamp': '2025-10-01 04:41:12.434399', 'step': 17300, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:12.487203', 'step': 17300, 'epoch': 3} {'type': 'loss', 'content': 0.07407543063163757, 'timestamp': '2025-10-01 04:41:12.489615', 'step': 17301, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:41:12.543475', 'step': 17301, 'epoch': 3} {'type': 'loss', 'content': 0.07468484342098236, 'timestamp': '2025-10-01 04:41:12.546494', 'step': 17302, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:12.600526', 'step': 17302, 'epoch': 3} {'type': 'loss', 'content': 0.13587896525859833, 'timestamp': '2025-10-01 04:41:12.603000', 'step': 17303, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:41:12.656388', 'step': 17303, 'epoch': 3} {'type': 'loss', 'content': 0.14778916537761688, 'timestamp': '2025-10-01 04:41:12.662350', 'step': 17304, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:12.715429', 'step': 17304, 'epoch': 3} {'type': 'loss', 'content': 0.07418383657932281, 'timestamp': '2025-10-01 04:41:12.717899', 'step': 17305, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:41:12.771445', 'step': 17305, 'epoch': 3} {'type': 'loss', 'content': 0.11714322865009308, 'timestamp': '2025-10-01 04:41:12.774133', 'step': 17306, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:12.828219', 'step': 17306, 'epoch': 3} {'type': 'loss', 'content': 0.11914203315973282, 'timestamp': '2025-10-01 04:41:12.831261', 'step': 17307, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:12.885077', 'step': 17307, 'epoch': 3} {'type': 'loss', 'content': 0.05678349360823631, 'timestamp': '2025-10-01 04:41:12.891628', 'step': 17308, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:12.945082', 'step': 17308, 'epoch': 3} {'type': 'loss', 'content': 0.12078651040792465, 'timestamp': '2025-10-01 04:41:12.947538', 'step': 17309, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:13.001706', 'step': 17309, 'epoch': 3} {'type': 'loss', 'content': 0.12468794733285904, 'timestamp': '2025-10-01 04:41:13.004320', 'step': 17310, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:41:13.058120', 'step': 17310, 'epoch': 3} {'type': 'loss', 'content': 0.16035588085651398, 'timestamp': '2025-10-01 04:41:13.060723', 'step': 17311, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:13.114646', 'step': 17311, 'epoch': 3} {'type': 'loss', 'content': 0.13796977698802948, 'timestamp': '2025-10-01 04:41:13.120864', 'step': 17312, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:13.174044', 'step': 17312, 'epoch': 3} {'type': 'loss', 'content': 0.13765068352222443, 'timestamp': '2025-10-01 04:41:13.176615', 'step': 17313, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:13.230434', 'step': 17313, 'epoch': 3} {'type': 'loss', 'content': 0.07478981465101242, 'timestamp': '2025-10-01 04:41:13.233221', 'step': 17314, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:13.286871', 'step': 17314, 'epoch': 3} {'type': 'loss', 'content': 0.16296666860580444, 'timestamp': '2025-10-01 04:41:13.289575', 'step': 17315, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:13.344538', 'step': 17315, 'epoch': 3} {'type': 'loss', 'content': 0.132991760969162, 'timestamp': '2025-10-01 04:41:13.350398', 'step': 17316, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:13.404162', 'step': 17316, 'epoch': 3} {'type': 'loss', 'content': 0.04870975762605667, 'timestamp': '2025-10-01 04:41:13.406938', 'step': 17317, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:13.460135', 'step': 17317, 'epoch': 3} {'type': 'loss', 'content': 0.07079969346523285, 'timestamp': '2025-10-01 04:41:13.462753', 'step': 17318, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:13.517324', 'step': 17318, 'epoch': 3} {'type': 'loss', 'content': 0.18652895092964172, 'timestamp': '2025-10-01 04:41:13.520015', 'step': 17319, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:41:13.575456', 'step': 17319, 'epoch': 3} {'type': 'loss', 'content': 0.07203453034162521, 'timestamp': '2025-10-01 04:41:13.581747', 'step': 17320, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:13.635318', 'step': 17320, 'epoch': 3} {'type': 'loss', 'content': 0.09025164693593979, 'timestamp': '2025-10-01 04:41:13.638126', 'step': 17321, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:13.692651', 'step': 17321, 'epoch': 3} {'type': 'loss', 'content': 0.04527456313371658, 'timestamp': '2025-10-01 04:41:13.695318', 'step': 17322, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:13.749979', 'step': 17322, 'epoch': 3} {'type': 'loss', 'content': 0.09663894772529602, 'timestamp': '2025-10-01 04:41:13.752531', 'step': 17323, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:13.806812', 'step': 17323, 'epoch': 3} {'type': 'loss', 'content': 0.06082445755600929, 'timestamp': '2025-10-01 04:41:13.812950', 'step': 17324, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:13.866036', 'step': 17324, 'epoch': 3} {'type': 'loss', 'content': 0.05303126201033592, 'timestamp': '2025-10-01 04:41:13.868615', 'step': 17325, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:13.923081', 'step': 17325, 'epoch': 3} {'type': 'loss', 'content': 0.06316357851028442, 'timestamp': '2025-10-01 04:41:13.925773', 'step': 17326, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:13.979601', 'step': 17326, 'epoch': 3} {'type': 'loss', 'content': 0.0674278512597084, 'timestamp': '2025-10-01 04:41:13.982241', 'step': 17327, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:41:14.035942', 'step': 17327, 'epoch': 3} {'type': 'loss', 'content': 0.04560694098472595, 'timestamp': '2025-10-01 04:41:14.041614', 'step': 17328, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:14.094507', 'step': 17328, 'epoch': 3} {'type': 'loss', 'content': 0.16354264318943024, 'timestamp': '2025-10-01 04:41:14.096779', 'step': 17329, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:41:14.152487', 'step': 17329, 'epoch': 3} {'type': 'loss', 'content': 0.08533481508493423, 'timestamp': '2025-10-01 04:41:14.155245', 'step': 17330, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:14.209650', 'step': 17330, 'epoch': 3} {'type': 'loss', 'content': 0.1478225141763687, 'timestamp': '2025-10-01 04:41:14.212017', 'step': 17331, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:14.264938', 'step': 17331, 'epoch': 3} {'type': 'loss', 'content': 0.128190279006958, 'timestamp': '2025-10-01 04:41:14.270738', 'step': 17332, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:14.325028', 'step': 17332, 'epoch': 3} {'type': 'loss', 'content': 0.07366609573364258, 'timestamp': '2025-10-01 04:41:14.327345', 'step': 17333, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:14.380953', 'step': 17333, 'epoch': 3} {'type': 'loss', 'content': 0.09670501947402954, 'timestamp': '2025-10-01 04:41:14.383538', 'step': 17334, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:14.436735', 'step': 17334, 'epoch': 3} {'type': 'loss', 'content': 0.07457425445318222, 'timestamp': '2025-10-01 04:41:14.439128', 'step': 17335, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:41:14.493404', 'step': 17335, 'epoch': 3} {'type': 'loss', 'content': 0.14085906744003296, 'timestamp': '2025-10-01 04:41:14.499155', 'step': 17336, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:41:14.551304', 'step': 17336, 'epoch': 3} {'type': 'loss', 'content': 0.127850741147995, 'timestamp': '2025-10-01 04:41:14.553703', 'step': 17337, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:14.606808', 'step': 17337, 'epoch': 3} {'type': 'loss', 'content': 0.1706426739692688, 'timestamp': '2025-10-01 04:41:14.608997', 'step': 17338, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:14.662333', 'step': 17338, 'epoch': 3} {'type': 'loss', 'content': 0.0823824480175972, 'timestamp': '2025-10-01 04:41:14.664690', 'step': 17339, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:14.718757', 'step': 17339, 'epoch': 3} {'type': 'loss', 'content': 0.07416534423828125, 'timestamp': '2025-10-01 04:41:14.724598', 'step': 17340, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:41:14.777084', 'step': 17340, 'epoch': 3} {'type': 'loss', 'content': 0.16423951089382172, 'timestamp': '2025-10-01 04:41:14.779457', 'step': 17341, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:14.832651', 'step': 17341, 'epoch': 3} {'type': 'loss', 'content': 0.09653858095407486, 'timestamp': '2025-10-01 04:41:14.834856', 'step': 17342, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:41:14.893012', 'step': 17342, 'epoch': 3} {'type': 'loss', 'content': 0.10057847201824188, 'timestamp': '2025-10-01 04:41:14.895704', 'step': 17343, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:14.949206', 'step': 17343, 'epoch': 3} {'type': 'loss', 'content': 0.0494752936065197, 'timestamp': '2025-10-01 04:41:14.955117', 'step': 17344, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:15.007742', 'step': 17344, 'epoch': 3} {'type': 'loss', 'content': 0.13161678612232208, 'timestamp': '2025-10-01 04:41:15.009961', 'step': 17345, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:15.065912', 'step': 17345, 'epoch': 3} {'type': 'loss', 'content': 0.09568651020526886, 'timestamp': '2025-10-01 04:41:15.069153', 'step': 17346, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:15.122301', 'step': 17346, 'epoch': 3} {'type': 'loss', 'content': 0.05027734860777855, 'timestamp': '2025-10-01 04:41:15.124524', 'step': 17347, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:15.181014', 'step': 17347, 'epoch': 3} {'type': 'loss', 'content': 0.1255384385585785, 'timestamp': '2025-10-01 04:41:15.186763', 'step': 17348, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:15.240119', 'step': 17348, 'epoch': 3} {'type': 'loss', 'content': 0.07705984264612198, 'timestamp': '2025-10-01 04:41:15.243041', 'step': 17349, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:41:15.307599', 'step': 17349, 'epoch': 3} {'type': 'loss', 'content': 0.0461539551615715, 'timestamp': '2025-10-01 04:41:15.310288', 'step': 17350, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:15.364001', 'step': 17350, 'epoch': 3} {'type': 'loss', 'content': 0.1541242003440857, 'timestamp': '2025-10-01 04:41:15.366190', 'step': 17351, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:15.419602', 'step': 17351, 'epoch': 3} {'type': 'loss', 'content': 0.07240046560764313, 'timestamp': '2025-10-01 04:41:15.425476', 'step': 17352, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:15.478701', 'step': 17352, 'epoch': 3} {'type': 'loss', 'content': 0.09199321269989014, 'timestamp': '2025-10-01 04:41:15.484548', 'step': 17353, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:15.537857', 'step': 17353, 'epoch': 3} {'type': 'loss', 'content': 0.09308592230081558, 'timestamp': '2025-10-01 04:41:15.540123', 'step': 17354, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:15.593398', 'step': 17354, 'epoch': 3} {'type': 'loss', 'content': 0.19013473391532898, 'timestamp': '2025-10-01 04:41:15.596086', 'step': 17355, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:15.648831', 'step': 17355, 'epoch': 3} {'type': 'loss', 'content': 0.06392579525709152, 'timestamp': '2025-10-01 04:41:15.671461', 'step': 17356, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:15.724654', 'step': 17356, 'epoch': 3} {'type': 'loss', 'content': 0.049865227192640305, 'timestamp': '2025-10-01 04:41:15.727204', 'step': 17357, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:15.782176', 'step': 17357, 'epoch': 3} {'type': 'loss', 'content': 0.10996051877737045, 'timestamp': '2025-10-01 04:41:15.784590', 'step': 17358, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:15.838425', 'step': 17358, 'epoch': 3} {'type': 'loss', 'content': 0.08072379231452942, 'timestamp': '2025-10-01 04:41:15.840735', 'step': 17359, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:15.895317', 'step': 17359, 'epoch': 3} {'type': 'loss', 'content': 0.13373760879039764, 'timestamp': '2025-10-01 04:41:15.901250', 'step': 17360, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:15.954550', 'step': 17360, 'epoch': 3} {'type': 'loss', 'content': 0.11807684600353241, 'timestamp': '2025-10-01 04:41:15.957063', 'step': 17361, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:16.010195', 'step': 17361, 'epoch': 3} {'type': 'loss', 'content': 0.12862184643745422, 'timestamp': '2025-10-01 04:41:16.012704', 'step': 17362, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:41:16.065640', 'step': 17362, 'epoch': 3} {'type': 'loss', 'content': 0.09171861410140991, 'timestamp': '2025-10-01 04:41:16.068127', 'step': 17363, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:16.123460', 'step': 17363, 'epoch': 3} {'type': 'loss', 'content': 0.1143263727426529, 'timestamp': '2025-10-01 04:41:16.129214', 'step': 17364, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:41:16.181395', 'step': 17364, 'epoch': 3} {'type': 'loss', 'content': 0.00993430521339178, 'timestamp': '2025-10-01 04:41:16.183557', 'step': 17365, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:16.237741', 'step': 17365, 'epoch': 3} {'type': 'loss', 'content': 0.14758504927158356, 'timestamp': '2025-10-01 04:41:16.240212', 'step': 17366, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:16.292921', 'step': 17366, 'epoch': 3} {'type': 'loss', 'content': 0.040378447622060776, 'timestamp': '2025-10-01 04:41:16.295257', 'step': 17367, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:16.347999', 'step': 17367, 'epoch': 3} {'type': 'loss', 'content': 0.12636397778987885, 'timestamp': '2025-10-01 04:41:16.353739', 'step': 17368, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:16.406285', 'step': 17368, 'epoch': 3} {'type': 'loss', 'content': 0.07611023634672165, 'timestamp': '2025-10-01 04:41:16.408771', 'step': 17369, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:16.461567', 'step': 17369, 'epoch': 3} {'type': 'loss', 'content': 0.13052594661712646, 'timestamp': '2025-10-01 04:41:16.463781', 'step': 17370, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:16.517576', 'step': 17370, 'epoch': 3} {'type': 'loss', 'content': 0.056020986288785934, 'timestamp': '2025-10-01 04:41:16.519878', 'step': 17371, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:16.576948', 'step': 17371, 'epoch': 3} {'type': 'loss', 'content': 0.07827010005712509, 'timestamp': '2025-10-01 04:41:16.582782', 'step': 17372, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:16.636598', 'step': 17372, 'epoch': 3} {'type': 'loss', 'content': 0.12364786863327026, 'timestamp': '2025-10-01 04:41:16.638853', 'step': 17373, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:16.692736', 'step': 17373, 'epoch': 3} {'type': 'loss', 'content': 0.03649337962269783, 'timestamp': '2025-10-01 04:41:16.695045', 'step': 17374, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:16.748466', 'step': 17374, 'epoch': 3} {'type': 'loss', 'content': 0.15798120200634003, 'timestamp': '2025-10-01 04:41:16.750665', 'step': 17375, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:41:16.804009', 'step': 17375, 'epoch': 3} {'type': 'loss', 'content': 0.11364872753620148, 'timestamp': '2025-10-01 04:41:16.809833', 'step': 17376, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:16.862567', 'step': 17376, 'epoch': 3} {'type': 'loss', 'content': 0.04977977275848389, 'timestamp': '2025-10-01 04:41:16.864504', 'step': 17377, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:16.918197', 'step': 17377, 'epoch': 3} {'type': 'loss', 'content': 0.060390159487724304, 'timestamp': '2025-10-01 04:41:16.920169', 'step': 17378, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:16.973780', 'step': 17378, 'epoch': 3} {'type': 'loss', 'content': 0.0862983763217926, 'timestamp': '2025-10-01 04:41:16.975658', 'step': 17379, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:17.028753', 'step': 17379, 'epoch': 3} {'type': 'loss', 'content': 0.06127627566456795, 'timestamp': '2025-10-01 04:41:17.034663', 'step': 17380, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:17.087611', 'step': 17380, 'epoch': 3} {'type': 'loss', 'content': 0.09094192087650299, 'timestamp': '2025-10-01 04:41:17.090653', 'step': 17381, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:17.144598', 'step': 17381, 'epoch': 3} {'type': 'loss', 'content': 0.10394313186407089, 'timestamp': '2025-10-01 04:41:17.146779', 'step': 17382, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:17.200763', 'step': 17382, 'epoch': 3} {'type': 'loss', 'content': 0.12583479285240173, 'timestamp': '2025-10-01 04:41:17.203007', 'step': 17383, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:17.257500', 'step': 17383, 'epoch': 3} {'type': 'loss', 'content': 0.07621912658214569, 'timestamp': '2025-10-01 04:41:17.263419', 'step': 17384, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:41:17.319226', 'step': 17384, 'epoch': 3} {'type': 'loss', 'content': 0.11960317939519882, 'timestamp': '2025-10-01 04:41:17.321078', 'step': 17385, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:17.374880', 'step': 17385, 'epoch': 3} {'type': 'loss', 'content': 0.06977013498544693, 'timestamp': '2025-10-01 04:41:17.377054', 'step': 17386, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:17.434411', 'step': 17386, 'epoch': 3} {'type': 'loss', 'content': 0.06627658009529114, 'timestamp': '2025-10-01 04:41:17.436759', 'step': 17387, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:41:17.490386', 'step': 17387, 'epoch': 3} {'type': 'loss', 'content': 0.16788162291049957, 'timestamp': '2025-10-01 04:41:17.496298', 'step': 17388, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:17.548729', 'step': 17388, 'epoch': 3} {'type': 'loss', 'content': 0.06746575981378555, 'timestamp': '2025-10-01 04:41:17.550938', 'step': 17389, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:17.613129', 'step': 17389, 'epoch': 3} {'type': 'loss', 'content': 0.21792981028556824, 'timestamp': '2025-10-01 04:41:17.615521', 'step': 17390, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:17.669819', 'step': 17390, 'epoch': 3} {'type': 'loss', 'content': 0.08692492544651031, 'timestamp': '2025-10-01 04:41:17.671786', 'step': 17391, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:17.724685', 'step': 17391, 'epoch': 3} {'type': 'loss', 'content': 0.07158602029085159, 'timestamp': '2025-10-01 04:41:17.730194', 'step': 17392, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:17.783424', 'step': 17392, 'epoch': 3} {'type': 'loss', 'content': 0.08115480840206146, 'timestamp': '2025-10-01 04:41:17.785941', 'step': 17393, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:17.838850', 'step': 17393, 'epoch': 3} {'type': 'loss', 'content': 0.14630892872810364, 'timestamp': '2025-10-01 04:41:17.841027', 'step': 17394, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:41:17.894024', 'step': 17394, 'epoch': 3} {'type': 'loss', 'content': 0.051263272762298584, 'timestamp': '2025-10-01 04:41:17.896355', 'step': 17395, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:17.951052', 'step': 17395, 'epoch': 3} {'type': 'loss', 'content': 0.11626391857862473, 'timestamp': '2025-10-01 04:41:17.956894', 'step': 17396, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:18.009194', 'step': 17396, 'epoch': 3} {'type': 'loss', 'content': 0.1268010139465332, 'timestamp': '2025-10-01 04:41:18.011417', 'step': 17397, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:18.065769', 'step': 17397, 'epoch': 3} {'type': 'loss', 'content': 0.03690572455525398, 'timestamp': '2025-10-01 04:41:18.068029', 'step': 17398, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:18.121580', 'step': 17398, 'epoch': 3} {'type': 'loss', 'content': 0.07130944728851318, 'timestamp': '2025-10-01 04:41:18.123404', 'step': 17399, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:18.176646', 'step': 17399, 'epoch': 3} {'type': 'loss', 'content': 0.05043657124042511, 'timestamp': '2025-10-01 04:41:18.184341', 'step': 17400, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:41:18.236919', 'step': 17400, 'epoch': 3} {'type': 'loss', 'content': 0.13554318249225616, 'timestamp': '2025-10-01 04:41:18.239208', 'step': 17401, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:41:18.294525', 'step': 17401, 'epoch': 3} {'type': 'loss', 'content': 0.06368489563465118, 'timestamp': '2025-10-01 04:41:18.296886', 'step': 17402, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:18.351333', 'step': 17402, 'epoch': 3} {'type': 'loss', 'content': 0.12722273170948029, 'timestamp': '2025-10-01 04:41:18.353482', 'step': 17403, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:18.407074', 'step': 17403, 'epoch': 3} {'type': 'loss', 'content': 0.08545086532831192, 'timestamp': '2025-10-01 04:41:18.412691', 'step': 17404, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:18.465707', 'step': 17404, 'epoch': 3} {'type': 'loss', 'content': 0.14495518803596497, 'timestamp': '2025-10-01 04:41:18.467667', 'step': 17405, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:18.520728', 'step': 17405, 'epoch': 3} {'type': 'loss', 'content': 0.10384407639503479, 'timestamp': '2025-10-01 04:41:18.522832', 'step': 17406, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:18.577209', 'step': 17406, 'epoch': 3} {'type': 'loss', 'content': 0.06424491107463837, 'timestamp': '2025-10-01 04:41:18.579015', 'step': 17407, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:41:18.632948', 'step': 17407, 'epoch': 3} {'type': 'loss', 'content': 0.08353253453969955, 'timestamp': '2025-10-01 04:41:18.638956', 'step': 17408, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:18.692949', 'step': 17408, 'epoch': 3} {'type': 'loss', 'content': 0.07473672181367874, 'timestamp': '2025-10-01 04:41:18.695246', 'step': 17409, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:18.748686', 'step': 17409, 'epoch': 3} {'type': 'loss', 'content': 0.07756407558917999, 'timestamp': '2025-10-01 04:41:18.752529', 'step': 17410, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:18.812255', 'step': 17410, 'epoch': 3} {'type': 'loss', 'content': 0.09998349100351334, 'timestamp': '2025-10-01 04:41:18.814478', 'step': 17411, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:18.867856', 'step': 17411, 'epoch': 3} {'type': 'loss', 'content': 0.09626395255327225, 'timestamp': '2025-10-01 04:41:18.873316', 'step': 17412, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:18.925941', 'step': 17412, 'epoch': 3} {'type': 'loss', 'content': 0.036355357617139816, 'timestamp': '2025-10-01 04:41:18.927688', 'step': 17413, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:18.980761', 'step': 17413, 'epoch': 3} {'type': 'loss', 'content': 0.1619768887758255, 'timestamp': '2025-10-01 04:41:18.983346', 'step': 17414, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:19.037321', 'step': 17414, 'epoch': 3} {'type': 'loss', 'content': 0.14699293673038483, 'timestamp': '2025-10-01 04:41:19.039783', 'step': 17415, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:19.092641', 'step': 17415, 'epoch': 3} {'type': 'loss', 'content': 0.09191352128982544, 'timestamp': '2025-10-01 04:41:19.098477', 'step': 17416, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:19.153944', 'step': 17416, 'epoch': 3} {'type': 'loss', 'content': 0.1028250902891159, 'timestamp': '2025-10-01 04:41:19.156233', 'step': 17417, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:41:19.209770', 'step': 17417, 'epoch': 3} {'type': 'loss', 'content': 0.10667725652456284, 'timestamp': '2025-10-01 04:41:19.212718', 'step': 17418, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:19.266060', 'step': 17418, 'epoch': 3} {'type': 'loss', 'content': 0.04796776548027992, 'timestamp': '2025-10-01 04:41:19.269104', 'step': 17419, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:19.322243', 'step': 17419, 'epoch': 3} {'type': 'loss', 'content': 0.11066696792840958, 'timestamp': '2025-10-01 04:41:19.328158', 'step': 17420, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:19.381214', 'step': 17420, 'epoch': 3} {'type': 'loss', 'content': 0.13658343255519867, 'timestamp': '2025-10-01 04:41:19.383106', 'step': 17421, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:19.436437', 'step': 17421, 'epoch': 3} {'type': 'loss', 'content': 0.11697963625192642, 'timestamp': '2025-10-01 04:41:19.438981', 'step': 17422, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:19.493298', 'step': 17422, 'epoch': 3} {'type': 'loss', 'content': 0.06507313251495361, 'timestamp': '2025-10-01 04:41:19.495637', 'step': 17423, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:41:19.552489', 'step': 17423, 'epoch': 3} {'type': 'loss', 'content': 0.061962101608514786, 'timestamp': '2025-10-01 04:41:19.558460', 'step': 17424, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:19.611491', 'step': 17424, 'epoch': 3} {'type': 'loss', 'content': 0.1283290684223175, 'timestamp': '2025-10-01 04:41:19.614310', 'step': 17425, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:19.674678', 'step': 17425, 'epoch': 3} {'type': 'loss', 'content': 0.05209827050566673, 'timestamp': '2025-10-01 04:41:19.677138', 'step': 17426, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:19.730899', 'step': 17426, 'epoch': 3} {'type': 'loss', 'content': 0.0673254132270813, 'timestamp': '2025-10-01 04:41:19.733833', 'step': 17427, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:19.797730', 'step': 17427, 'epoch': 3} {'type': 'loss', 'content': 0.15878726541996002, 'timestamp': '2025-10-01 04:41:19.803191', 'step': 17428, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:19.856161', 'step': 17428, 'epoch': 3} {'type': 'loss', 'content': 0.03970048949122429, 'timestamp': '2025-10-01 04:41:19.858126', 'step': 17429, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:19.911249', 'step': 17429, 'epoch': 3} {'type': 'loss', 'content': 0.05627080798149109, 'timestamp': '2025-10-01 04:41:19.913706', 'step': 17430, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:19.968841', 'step': 17430, 'epoch': 3} {'type': 'loss', 'content': 0.14954155683517456, 'timestamp': '2025-10-01 04:41:19.971401', 'step': 17431, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:20.024411', 'step': 17431, 'epoch': 3} {'type': 'loss', 'content': 0.061751097440719604, 'timestamp': '2025-10-01 04:41:20.032005', 'step': 17432, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:20.084835', 'step': 17432, 'epoch': 3} {'type': 'loss', 'content': 0.16488084197044373, 'timestamp': '2025-10-01 04:41:20.087061', 'step': 17433, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:20.140684', 'step': 17433, 'epoch': 3} {'type': 'loss', 'content': 0.06542777270078659, 'timestamp': '2025-10-01 04:41:20.142705', 'step': 17434, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:20.195735', 'step': 17434, 'epoch': 3} {'type': 'loss', 'content': 0.09064838290214539, 'timestamp': '2025-10-01 04:41:20.197706', 'step': 17435, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:20.250580', 'step': 17435, 'epoch': 3} {'type': 'loss', 'content': 0.08101693540811539, 'timestamp': '2025-10-01 04:41:20.256444', 'step': 17436, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:20.309882', 'step': 17436, 'epoch': 3} {'type': 'loss', 'content': 0.0916992649435997, 'timestamp': '2025-10-01 04:41:20.312404', 'step': 17437, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:20.367127', 'step': 17437, 'epoch': 3} {'type': 'loss', 'content': 0.04572683945298195, 'timestamp': '2025-10-01 04:41:20.369584', 'step': 17438, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:41:20.423969', 'step': 17438, 'epoch': 3} {'type': 'loss', 'content': 0.10765859484672546, 'timestamp': '2025-10-01 04:41:20.426239', 'step': 17439, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:20.479520', 'step': 17439, 'epoch': 3} {'type': 'loss', 'content': 0.05887732282280922, 'timestamp': '2025-10-01 04:41:20.485643', 'step': 17440, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:20.538442', 'step': 17440, 'epoch': 3} {'type': 'loss', 'content': 0.1391897052526474, 'timestamp': '2025-10-01 04:41:20.540453', 'step': 17441, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:20.593674', 'step': 17441, 'epoch': 3} {'type': 'loss', 'content': 0.059484221041202545, 'timestamp': '2025-10-01 04:41:20.595766', 'step': 17442, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:20.649885', 'step': 17442, 'epoch': 3} {'type': 'loss', 'content': 0.061773452907800674, 'timestamp': '2025-10-01 04:41:20.652794', 'step': 17443, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:20.707184', 'step': 17443, 'epoch': 3} {'type': 'loss', 'content': 0.06102920323610306, 'timestamp': '2025-10-01 04:41:20.713348', 'step': 17444, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:20.772112', 'step': 17444, 'epoch': 3} {'type': 'loss', 'content': 0.10307243466377258, 'timestamp': '2025-10-01 04:41:20.779790', 'step': 17445, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:20.835595', 'step': 17445, 'epoch': 3} {'type': 'loss', 'content': 0.07814847677946091, 'timestamp': '2025-10-01 04:41:20.837807', 'step': 17446, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:20.891066', 'step': 17446, 'epoch': 3} {'type': 'loss', 'content': 0.0752762109041214, 'timestamp': '2025-10-01 04:41:20.893653', 'step': 17447, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:20.947494', 'step': 17447, 'epoch': 3} {'type': 'loss', 'content': 0.1025373786687851, 'timestamp': '2025-10-01 04:41:20.953335', 'step': 17448, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:41:21.006787', 'step': 17448, 'epoch': 3} {'type': 'loss', 'content': 0.11699245870113373, 'timestamp': '2025-10-01 04:41:21.009176', 'step': 17449, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:21.067482', 'step': 17449, 'epoch': 3} {'type': 'loss', 'content': 0.12087853252887726, 'timestamp': '2025-10-01 04:41:21.069844', 'step': 17450, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:41:21.123155', 'step': 17450, 'epoch': 3} {'type': 'loss', 'content': 0.1081804484128952, 'timestamp': '2025-10-01 04:41:21.129847', 'step': 17451, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:21.185544', 'step': 17451, 'epoch': 3} {'type': 'loss', 'content': 0.07913147658109665, 'timestamp': '2025-10-01 04:41:21.191631', 'step': 17452, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:21.245432', 'step': 17452, 'epoch': 3} {'type': 'loss', 'content': 0.0828879326581955, 'timestamp': '2025-10-01 04:41:21.250097', 'step': 17453, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:21.306311', 'step': 17453, 'epoch': 3} {'type': 'loss', 'content': 0.10987494140863419, 'timestamp': '2025-10-01 04:41:21.308566', 'step': 17454, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:21.371648', 'step': 17454, 'epoch': 3} {'type': 'loss', 'content': 0.08611159026622772, 'timestamp': '2025-10-01 04:41:21.373946', 'step': 17455, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:21.433642', 'step': 17455, 'epoch': 3} {'type': 'loss', 'content': 0.11987607926130295, 'timestamp': '2025-10-01 04:41:21.439604', 'step': 17456, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:21.492450', 'step': 17456, 'epoch': 3} {'type': 'loss', 'content': 0.0372345931828022, 'timestamp': '2025-10-01 04:41:21.494634', 'step': 17457, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:21.550367', 'step': 17457, 'epoch': 3} {'type': 'loss', 'content': 0.04405001550912857, 'timestamp': '2025-10-01 04:41:21.552785', 'step': 17458, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:21.610873', 'step': 17458, 'epoch': 3} {'type': 'loss', 'content': 0.07313288003206253, 'timestamp': '2025-10-01 04:41:21.613584', 'step': 17459, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:21.667523', 'step': 17459, 'epoch': 3} {'type': 'loss', 'content': 0.04748966917395592, 'timestamp': '2025-10-01 04:41:21.673496', 'step': 17460, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:41:21.730164', 'step': 17460, 'epoch': 3} {'type': 'loss', 'content': 0.12605169415473938, 'timestamp': '2025-10-01 04:41:21.732809', 'step': 17461, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:21.788111', 'step': 17461, 'epoch': 3} {'type': 'loss', 'content': 0.0610831193625927, 'timestamp': '2025-10-01 04:41:21.791893', 'step': 17462, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:21.848550', 'step': 17462, 'epoch': 3} {'type': 'loss', 'content': 0.05915481969714165, 'timestamp': '2025-10-01 04:41:21.853893', 'step': 17463, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:21.909588', 'step': 17463, 'epoch': 3} {'type': 'loss', 'content': 0.1105610728263855, 'timestamp': '2025-10-01 04:41:21.917285', 'step': 17464, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:21.979682', 'step': 17464, 'epoch': 3} {'type': 'loss', 'content': 0.0984569862484932, 'timestamp': '2025-10-01 04:41:21.981979', 'step': 17465, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:22.047996', 'step': 17465, 'epoch': 3} {'type': 'loss', 'content': 0.08902273327112198, 'timestamp': '2025-10-01 04:41:22.050374', 'step': 17466, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:22.105500', 'step': 17466, 'epoch': 3} {'type': 'loss', 'content': 0.14001496136188507, 'timestamp': '2025-10-01 04:41:22.107971', 'step': 17467, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:22.162124', 'step': 17467, 'epoch': 3} {'type': 'loss', 'content': 0.21970048546791077, 'timestamp': '2025-10-01 04:41:22.169268', 'step': 17468, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:22.223296', 'step': 17468, 'epoch': 3} {'type': 'loss', 'content': 0.08977333456277847, 'timestamp': '2025-10-01 04:41:22.225795', 'step': 17469, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:41:22.280022', 'step': 17469, 'epoch': 3} {'type': 'loss', 'content': 0.0577753409743309, 'timestamp': '2025-10-01 04:41:22.283080', 'step': 17470, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:22.337495', 'step': 17470, 'epoch': 3} {'type': 'loss', 'content': 0.1276991069316864, 'timestamp': '2025-10-01 04:41:22.339980', 'step': 17471, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:22.394231', 'step': 17471, 'epoch': 3} {'type': 'loss', 'content': 0.04290851205587387, 'timestamp': '2025-10-01 04:41:22.400797', 'step': 17472, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:22.454269', 'step': 17472, 'epoch': 3} {'type': 'loss', 'content': 0.14120377600193024, 'timestamp': '2025-10-01 04:41:22.456980', 'step': 17473, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:41:22.511689', 'step': 17473, 'epoch': 3} {'type': 'loss', 'content': 0.12333276122808456, 'timestamp': '2025-10-01 04:41:22.513741', 'step': 17474, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:22.570369', 'step': 17474, 'epoch': 3} {'type': 'loss', 'content': 0.09858956187963486, 'timestamp': '2025-10-01 04:41:22.572848', 'step': 17475, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:22.640595', 'step': 17475, 'epoch': 3} {'type': 'loss', 'content': 0.09696587920188904, 'timestamp': '2025-10-01 04:41:22.646853', 'step': 17476, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:22.700228', 'step': 17476, 'epoch': 3} {'type': 'loss', 'content': 0.0706406682729721, 'timestamp': '2025-10-01 04:41:22.702701', 'step': 17477, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:41:22.756309', 'step': 17477, 'epoch': 3} {'type': 'loss', 'content': 0.09228714555501938, 'timestamp': '2025-10-01 04:41:22.758820', 'step': 17478, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:22.823123', 'step': 17478, 'epoch': 3} {'type': 'loss', 'content': 0.038432527333498, 'timestamp': '2025-10-01 04:41:22.825786', 'step': 17479, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:22.887821', 'step': 17479, 'epoch': 3} {'type': 'loss', 'content': 0.07454249262809753, 'timestamp': '2025-10-01 04:41:22.894170', 'step': 17480, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:22.948394', 'step': 17480, 'epoch': 3} {'type': 'loss', 'content': 0.0476447232067585, 'timestamp': '2025-10-01 04:41:22.951056', 'step': 17481, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:41:23.005201', 'step': 17481, 'epoch': 3} {'type': 'loss', 'content': 0.013122553937137127, 'timestamp': '2025-10-01 04:41:23.007408', 'step': 17482, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:23.062422', 'step': 17482, 'epoch': 3} {'type': 'loss', 'content': 0.08373011648654938, 'timestamp': '2025-10-01 04:41:23.065105', 'step': 17483, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:41:23.119714', 'step': 17483, 'epoch': 3} {'type': 'loss', 'content': 0.010333703830838203, 'timestamp': '2025-10-01 04:41:23.125551', 'step': 17484, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:23.179599', 'step': 17484, 'epoch': 3} {'type': 'loss', 'content': 0.08513558655977249, 'timestamp': '2025-10-01 04:41:23.181830', 'step': 17485, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:41:23.234927', 'step': 17485, 'epoch': 3} {'type': 'loss', 'content': 0.05296604335308075, 'timestamp': '2025-10-01 04:41:23.237300', 'step': 17486, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:23.291066', 'step': 17486, 'epoch': 3} {'type': 'loss', 'content': 0.0869765430688858, 'timestamp': '2025-10-01 04:41:23.294375', 'step': 17487, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:23.347527', 'step': 17487, 'epoch': 3} {'type': 'loss', 'content': 0.1196417361497879, 'timestamp': '2025-10-01 04:41:23.353547', 'step': 17488, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:23.406141', 'step': 17488, 'epoch': 3} {'type': 'loss', 'content': 0.05238686874508858, 'timestamp': '2025-10-01 04:41:23.408428', 'step': 17489, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:23.461379', 'step': 17489, 'epoch': 3} {'type': 'loss', 'content': 0.098643958568573, 'timestamp': '2025-10-01 04:41:23.464658', 'step': 17490, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:41:23.518816', 'step': 17490, 'epoch': 3} {'type': 'loss', 'content': 0.10719077289104462, 'timestamp': '2025-10-01 04:41:23.521031', 'step': 17491, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:23.576537', 'step': 17491, 'epoch': 3} {'type': 'loss', 'content': 0.09128353744745255, 'timestamp': '2025-10-01 04:41:23.583688', 'step': 17492, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:23.639468', 'step': 17492, 'epoch': 3} {'type': 'loss', 'content': 0.08664881438016891, 'timestamp': '2025-10-01 04:41:23.641619', 'step': 17493, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:23.696856', 'step': 17493, 'epoch': 3} {'type': 'loss', 'content': 0.04411144554615021, 'timestamp': '2025-10-01 04:41:23.699076', 'step': 17494, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:41:23.758264', 'step': 17494, 'epoch': 3} {'type': 'loss', 'content': 0.06597684323787689, 'timestamp': '2025-10-01 04:41:23.760505', 'step': 17495, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:23.815150', 'step': 17495, 'epoch': 3} {'type': 'loss', 'content': 0.03622524067759514, 'timestamp': '2025-10-01 04:41:23.821792', 'step': 17496, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:23.875779', 'step': 17496, 'epoch': 3} {'type': 'loss', 'content': 0.04371532425284386, 'timestamp': '2025-10-01 04:41:23.878017', 'step': 17497, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:41:23.932319', 'step': 17497, 'epoch': 3} {'type': 'loss', 'content': 0.09210346639156342, 'timestamp': '2025-10-01 04:41:23.934607', 'step': 17498, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:23.988527', 'step': 17498, 'epoch': 3} {'type': 'loss', 'content': 0.09062675386667252, 'timestamp': '2025-10-01 04:41:23.990588', 'step': 17499, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:24.044619', 'step': 17499, 'epoch': 3} {'type': 'loss', 'content': 0.041975121945142746, 'timestamp': '2025-10-01 04:41:24.050976', 'step': 17500, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 17500', 'timestamp': '2025-10-01 04:41:24.568403', 'step': 17500, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:24.626149', 'step': 17500, 'epoch': 3} {'type': 'loss', 'content': 0.08571474999189377, 'timestamp': '2025-10-01 04:41:24.629241', 'step': 17501, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:24.684446', 'step': 17501, 'epoch': 3} {'type': 'loss', 'content': 0.12352115660905838, 'timestamp': '2025-10-01 04:41:24.688221', 'step': 17502, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:41:24.743599', 'step': 17502, 'epoch': 3} {'type': 'loss', 'content': 0.13421806693077087, 'timestamp': '2025-10-01 04:41:24.745928', 'step': 17503, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:24.800716', 'step': 17503, 'epoch': 3} {'type': 'loss', 'content': 0.10246090590953827, 'timestamp': '2025-10-01 04:41:24.807199', 'step': 17504, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:41:24.861711', 'step': 17504, 'epoch': 3} {'type': 'loss', 'content': 0.09784713387489319, 'timestamp': '2025-10-01 04:41:24.863870', 'step': 17505, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:24.918472', 'step': 17505, 'epoch': 3} {'type': 'loss', 'content': 0.06693634390830994, 'timestamp': '2025-10-01 04:41:24.920784', 'step': 17506, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:24.975772', 'step': 17506, 'epoch': 3} {'type': 'loss', 'content': 0.11733103543519974, 'timestamp': '2025-10-01 04:41:24.983792', 'step': 17507, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:25.043321', 'step': 17507, 'epoch': 3} {'type': 'loss', 'content': 0.08168281614780426, 'timestamp': '2025-10-01 04:41:25.055104', 'step': 17508, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:25.109967', 'step': 17508, 'epoch': 3} {'type': 'loss', 'content': 0.09442901611328125, 'timestamp': '2025-10-01 04:41:25.112291', 'step': 17509, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:25.182136', 'step': 17509, 'epoch': 3} {'type': 'loss', 'content': 0.0570453442633152, 'timestamp': '2025-10-01 04:41:25.184454', 'step': 17510, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:25.240925', 'step': 17510, 'epoch': 3} {'type': 'loss', 'content': 0.1113770455121994, 'timestamp': '2025-10-01 04:41:25.243289', 'step': 17511, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:25.307305', 'step': 17511, 'epoch': 3} {'type': 'loss', 'content': 0.02503584884107113, 'timestamp': '2025-10-01 04:41:25.314180', 'step': 17512, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:25.372244', 'step': 17512, 'epoch': 3} {'type': 'loss', 'content': 0.09310398995876312, 'timestamp': '2025-10-01 04:41:25.374542', 'step': 17513, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:25.432664', 'step': 17513, 'epoch': 3} {'type': 'loss', 'content': 0.058584731072187424, 'timestamp': '2025-10-01 04:41:25.435410', 'step': 17514, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:25.495280', 'step': 17514, 'epoch': 3} {'type': 'loss', 'content': 0.03713536262512207, 'timestamp': '2025-10-01 04:41:25.498278', 'step': 17515, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:25.558501', 'step': 17515, 'epoch': 3} {'type': 'loss', 'content': 0.08529367297887802, 'timestamp': '2025-10-01 04:41:25.565801', 'step': 17516, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:25.624084', 'step': 17516, 'epoch': 3} {'type': 'loss', 'content': 0.023347478359937668, 'timestamp': '2025-10-01 04:41:25.628143', 'step': 17517, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:25.685370', 'step': 17517, 'epoch': 3} {'type': 'loss', 'content': 0.05637630820274353, 'timestamp': '2025-10-01 04:41:25.687608', 'step': 17518, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:25.746126', 'step': 17518, 'epoch': 3} {'type': 'loss', 'content': 0.08098402619361877, 'timestamp': '2025-10-01 04:41:25.748560', 'step': 17519, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:25.806315', 'step': 17519, 'epoch': 3} {'type': 'loss', 'content': 0.07264506071805954, 'timestamp': '2025-10-01 04:41:25.813025', 'step': 17520, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:25.870177', 'step': 17520, 'epoch': 3} {'type': 'loss', 'content': 0.07390136271715164, 'timestamp': '2025-10-01 04:41:25.873236', 'step': 17521, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:25.933695', 'step': 17521, 'epoch': 3} {'type': 'loss', 'content': 0.04856889322400093, 'timestamp': '2025-10-01 04:41:25.935943', 'step': 17522, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:25.989754', 'step': 17522, 'epoch': 3} {'type': 'loss', 'content': 0.07571716606616974, 'timestamp': '2025-10-01 04:41:25.991945', 'step': 17523, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:26.045575', 'step': 17523, 'epoch': 3} {'type': 'loss', 'content': 0.06806579977273941, 'timestamp': '2025-10-01 04:41:26.051777', 'step': 17524, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:41:26.104745', 'step': 17524, 'epoch': 3} {'type': 'loss', 'content': 0.06736566871404648, 'timestamp': '2025-10-01 04:41:26.106910', 'step': 17525, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:41:26.160382', 'step': 17525, 'epoch': 3} {'type': 'loss', 'content': 0.13780486583709717, 'timestamp': '2025-10-01 04:41:26.162597', 'step': 17526, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:26.216809', 'step': 17526, 'epoch': 3} {'type': 'loss', 'content': 0.04107328876852989, 'timestamp': '2025-10-01 04:41:26.219251', 'step': 17527, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:26.273800', 'step': 17527, 'epoch': 3} {'type': 'loss', 'content': 0.05863248556852341, 'timestamp': '2025-10-01 04:41:26.279877', 'step': 17528, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:26.332610', 'step': 17528, 'epoch': 3} {'type': 'loss', 'content': 0.13401998579502106, 'timestamp': '2025-10-01 04:41:26.335259', 'step': 17529, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:26.389430', 'step': 17529, 'epoch': 3} {'type': 'loss', 'content': 0.13292670249938965, 'timestamp': '2025-10-01 04:41:26.391775', 'step': 17530, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:26.446531', 'step': 17530, 'epoch': 3} {'type': 'loss', 'content': 0.05716126784682274, 'timestamp': '2025-10-01 04:41:26.448736', 'step': 17531, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:26.502951', 'step': 17531, 'epoch': 3} {'type': 'loss', 'content': 0.06384629011154175, 'timestamp': '2025-10-01 04:41:26.509187', 'step': 17532, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:41:26.565917', 'step': 17532, 'epoch': 3} {'type': 'loss', 'content': 0.06388144940137863, 'timestamp': '2025-10-01 04:41:26.568099', 'step': 17533, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:26.621275', 'step': 17533, 'epoch': 3} {'type': 'loss', 'content': 0.09344474226236343, 'timestamp': '2025-10-01 04:41:26.623469', 'step': 17534, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:26.676654', 'step': 17534, 'epoch': 3} {'type': 'loss', 'content': 0.04175097495317459, 'timestamp': '2025-10-01 04:41:26.678900', 'step': 17535, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:26.738400', 'step': 17535, 'epoch': 3} {'type': 'loss', 'content': 0.07999492436647415, 'timestamp': '2025-10-01 04:41:26.744485', 'step': 17536, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:41:26.797346', 'step': 17536, 'epoch': 3} {'type': 'loss', 'content': 0.2150505781173706, 'timestamp': '2025-10-01 04:41:26.799673', 'step': 17537, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:26.852599', 'step': 17537, 'epoch': 3} {'type': 'loss', 'content': 0.04203873127698898, 'timestamp': '2025-10-01 04:41:26.854881', 'step': 17538, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:26.908111', 'step': 17538, 'epoch': 3} {'type': 'loss', 'content': 0.14732421934604645, 'timestamp': '2025-10-01 04:41:26.910234', 'step': 17539, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:41:26.963242', 'step': 17539, 'epoch': 3} {'type': 'loss', 'content': 0.07810721546411514, 'timestamp': '2025-10-01 04:41:26.969121', 'step': 17540, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:27.021929', 'step': 17540, 'epoch': 3} {'type': 'loss', 'content': 0.049583375453948975, 'timestamp': '2025-10-01 04:41:27.024273', 'step': 17541, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:27.076972', 'step': 17541, 'epoch': 3} {'type': 'loss', 'content': 0.05165588855743408, 'timestamp': '2025-10-01 04:41:27.079154', 'step': 17542, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:27.132438', 'step': 17542, 'epoch': 3} {'type': 'loss', 'content': 0.10194128006696701, 'timestamp': '2025-10-01 04:41:27.134890', 'step': 17543, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:27.193030', 'step': 17543, 'epoch': 3} {'type': 'loss', 'content': 0.16758090257644653, 'timestamp': '2025-10-01 04:41:27.198953', 'step': 17544, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:27.259330', 'step': 17544, 'epoch': 3} {'type': 'loss', 'content': 0.07005319744348526, 'timestamp': '2025-10-01 04:41:27.261614', 'step': 17545, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:41:27.314542', 'step': 17545, 'epoch': 3} {'type': 'loss', 'content': 0.0776812955737114, 'timestamp': '2025-10-01 04:41:27.316839', 'step': 17546, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:27.369648', 'step': 17546, 'epoch': 3} {'type': 'loss', 'content': 0.08841759711503983, 'timestamp': '2025-10-01 04:41:27.371898', 'step': 17547, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:27.425567', 'step': 17547, 'epoch': 3} {'type': 'loss', 'content': 0.04242114722728729, 'timestamp': '2025-10-01 04:41:27.431426', 'step': 17548, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:41:27.483942', 'step': 17548, 'epoch': 3} {'type': 'loss', 'content': 0.09440987557172775, 'timestamp': '2025-10-01 04:41:27.486324', 'step': 17549, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:27.540953', 'step': 17549, 'epoch': 3} {'type': 'loss', 'content': 0.04626094177365303, 'timestamp': '2025-10-01 04:41:27.549081', 'step': 17550, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:27.602568', 'step': 17550, 'epoch': 3} {'type': 'loss', 'content': 0.09858791530132294, 'timestamp': '2025-10-01 04:41:27.604732', 'step': 17551, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:27.657540', 'step': 17551, 'epoch': 3} {'type': 'loss', 'content': 0.06868249922990799, 'timestamp': '2025-10-01 04:41:27.663402', 'step': 17552, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:27.716429', 'step': 17552, 'epoch': 3} {'type': 'loss', 'content': 0.07315844297409058, 'timestamp': '2025-10-01 04:41:27.720377', 'step': 17553, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:27.775659', 'step': 17553, 'epoch': 3} {'type': 'loss', 'content': 0.10283838212490082, 'timestamp': '2025-10-01 04:41:27.777924', 'step': 17554, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:27.831588', 'step': 17554, 'epoch': 3} {'type': 'loss', 'content': 0.08977583050727844, 'timestamp': '2025-10-01 04:41:27.833974', 'step': 17555, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:27.887872', 'step': 17555, 'epoch': 3} {'type': 'loss', 'content': 0.07533830404281616, 'timestamp': '2025-10-01 04:41:27.893639', 'step': 17556, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:27.946295', 'step': 17556, 'epoch': 3} {'type': 'loss', 'content': 0.11447618901729584, 'timestamp': '2025-10-01 04:41:27.955625', 'step': 17557, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:28.008938', 'step': 17557, 'epoch': 3} {'type': 'loss', 'content': 0.07098537683486938, 'timestamp': '2025-10-01 04:41:28.011335', 'step': 17558, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:28.064647', 'step': 17558, 'epoch': 3} {'type': 'loss', 'content': 0.09717751294374466, 'timestamp': '2025-10-01 04:41:28.067669', 'step': 17559, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:28.121539', 'step': 17559, 'epoch': 3} {'type': 'loss', 'content': 0.10801887512207031, 'timestamp': '2025-10-01 04:41:28.127475', 'step': 17560, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:28.181731', 'step': 17560, 'epoch': 3} {'type': 'loss', 'content': 0.1474519968032837, 'timestamp': '2025-10-01 04:41:28.184029', 'step': 17561, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:28.236942', 'step': 17561, 'epoch': 3} {'type': 'loss', 'content': 0.08947234600782394, 'timestamp': '2025-10-01 04:41:28.239217', 'step': 17562, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:41:28.294434', 'step': 17562, 'epoch': 3} {'type': 'loss', 'content': 0.07460468262434006, 'timestamp': '2025-10-01 04:41:28.296682', 'step': 17563, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:28.349934', 'step': 17563, 'epoch': 3} {'type': 'loss', 'content': 0.05256705731153488, 'timestamp': '2025-10-01 04:41:28.357339', 'step': 17564, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:28.411045', 'step': 17564, 'epoch': 3} {'type': 'loss', 'content': 0.06496849656105042, 'timestamp': '2025-10-01 04:41:28.413465', 'step': 17565, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:28.467087', 'step': 17565, 'epoch': 3} {'type': 'loss', 'content': 0.06736055016517639, 'timestamp': '2025-10-01 04:41:28.469855', 'step': 17566, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:28.523654', 'step': 17566, 'epoch': 3} {'type': 'loss', 'content': 0.08501038700342178, 'timestamp': '2025-10-01 04:41:28.526170', 'step': 17567, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:28.580008', 'step': 17567, 'epoch': 3} {'type': 'loss', 'content': 0.031152639538049698, 'timestamp': '2025-10-01 04:41:28.585850', 'step': 17568, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:28.638482', 'step': 17568, 'epoch': 3} {'type': 'loss', 'content': 0.0820079818367958, 'timestamp': '2025-10-01 04:41:28.640696', 'step': 17569, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:28.692974', 'step': 17569, 'epoch': 3} {'type': 'loss', 'content': 0.12237206846475601, 'timestamp': '2025-10-01 04:41:28.695196', 'step': 17570, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:28.749565', 'step': 17570, 'epoch': 3} {'type': 'loss', 'content': 0.10028520971536636, 'timestamp': '2025-10-01 04:41:28.751831', 'step': 17571, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:28.804632', 'step': 17571, 'epoch': 3} {'type': 'loss', 'content': 0.030109047889709473, 'timestamp': '2025-10-01 04:41:28.810556', 'step': 17572, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:28.863469', 'step': 17572, 'epoch': 3} {'type': 'loss', 'content': 0.09400680661201477, 'timestamp': '2025-10-01 04:41:28.865938', 'step': 17573, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:28.919390', 'step': 17573, 'epoch': 3} {'type': 'loss', 'content': 0.012757455930113792, 'timestamp': '2025-10-01 04:41:28.921628', 'step': 17574, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:41:28.974663', 'step': 17574, 'epoch': 3} {'type': 'loss', 'content': 0.06648034602403641, 'timestamp': '2025-10-01 04:41:28.976712', 'step': 17575, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:29.030058', 'step': 17575, 'epoch': 3} {'type': 'loss', 'content': 0.06718829274177551, 'timestamp': '2025-10-01 04:41:29.035958', 'step': 17576, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:41:29.088642', 'step': 17576, 'epoch': 3} {'type': 'loss', 'content': 0.08010764420032501, 'timestamp': '2025-10-01 04:41:29.090808', 'step': 17577, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:29.145085', 'step': 17577, 'epoch': 3} {'type': 'loss', 'content': 0.03024432249367237, 'timestamp': '2025-10-01 04:41:29.147602', 'step': 17578, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:29.202068', 'step': 17578, 'epoch': 3} {'type': 'loss', 'content': 0.01254792045801878, 'timestamp': '2025-10-01 04:41:29.204428', 'step': 17579, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:29.257608', 'step': 17579, 'epoch': 3} {'type': 'loss', 'content': 0.15012137591838837, 'timestamp': '2025-10-01 04:41:29.264065', 'step': 17580, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:29.326724', 'step': 17580, 'epoch': 3} {'type': 'loss', 'content': 0.11155513674020767, 'timestamp': '2025-10-01 04:41:29.338069', 'step': 17581, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:29.390853', 'step': 17581, 'epoch': 3} {'type': 'loss', 'content': 0.05628962442278862, 'timestamp': '2025-10-01 04:41:29.392952', 'step': 17582, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:41:29.446182', 'step': 17582, 'epoch': 3} {'type': 'loss', 'content': 0.08121202886104584, 'timestamp': '2025-10-01 04:41:29.448319', 'step': 17583, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:29.502034', 'step': 17583, 'epoch': 3} {'type': 'loss', 'content': 0.0786471962928772, 'timestamp': '2025-10-01 04:41:29.507874', 'step': 17584, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:29.562444', 'step': 17584, 'epoch': 3} {'type': 'loss', 'content': 0.10496310144662857, 'timestamp': '2025-10-01 04:41:29.564626', 'step': 17585, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:29.617823', 'step': 17585, 'epoch': 3} {'type': 'loss', 'content': 0.0739634707570076, 'timestamp': '2025-10-01 04:41:29.620188', 'step': 17586, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:41:29.673544', 'step': 17586, 'epoch': 3} {'type': 'loss', 'content': 0.16483478248119354, 'timestamp': '2025-10-01 04:41:29.675985', 'step': 17587, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:29.729112', 'step': 17587, 'epoch': 3} {'type': 'loss', 'content': 0.0667203813791275, 'timestamp': '2025-10-01 04:41:29.735087', 'step': 17588, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:41:29.787501', 'step': 17588, 'epoch': 3} {'type': 'loss', 'content': 0.1071939617395401, 'timestamp': '2025-10-01 04:41:29.789722', 'step': 17589, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:29.842596', 'step': 17589, 'epoch': 3} {'type': 'loss', 'content': 0.10402566939592361, 'timestamp': '2025-10-01 04:41:29.844820', 'step': 17590, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:29.900047', 'step': 17590, 'epoch': 3} {'type': 'loss', 'content': 0.07133239507675171, 'timestamp': '2025-10-01 04:41:29.902235', 'step': 17591, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:29.955531', 'step': 17591, 'epoch': 3} {'type': 'loss', 'content': 0.07056381553411484, 'timestamp': '2025-10-01 04:41:29.961256', 'step': 17592, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:30.022877', 'step': 17592, 'epoch': 3} {'type': 'loss', 'content': 0.07861325889825821, 'timestamp': '2025-10-01 04:41:30.025075', 'step': 17593, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:30.079637', 'step': 17593, 'epoch': 3} {'type': 'loss', 'content': 0.04450634866952896, 'timestamp': '2025-10-01 04:41:30.081780', 'step': 17594, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:30.135074', 'step': 17594, 'epoch': 3} {'type': 'loss', 'content': 0.06885132193565369, 'timestamp': '2025-10-01 04:41:30.137277', 'step': 17595, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-01 04:41:30.190665', 'step': 17595, 'epoch': 3} {'type': 'loss', 'content': 0.04216032102704048, 'timestamp': '2025-10-01 04:41:30.196775', 'step': 17596, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:30.249375', 'step': 17596, 'epoch': 3} {'type': 'loss', 'content': 0.08974478393793106, 'timestamp': '2025-10-01 04:41:30.251560', 'step': 17597, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:30.304851', 'step': 17597, 'epoch': 3} {'type': 'loss', 'content': 0.07217369228601456, 'timestamp': '2025-10-01 04:41:30.307142', 'step': 17598, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:30.360364', 'step': 17598, 'epoch': 3} {'type': 'loss', 'content': 0.048559777438640594, 'timestamp': '2025-10-01 04:41:30.362983', 'step': 17599, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:30.417481', 'step': 17599, 'epoch': 3} {'type': 'loss', 'content': 0.12798671424388885, 'timestamp': '2025-10-01 04:41:30.423181', 'step': 17600, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:30.476063', 'step': 17600, 'epoch': 3} {'type': 'loss', 'content': 0.04790952801704407, 'timestamp': '2025-10-01 04:41:30.478492', 'step': 17601, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:41:30.531688', 'step': 17601, 'epoch': 3} {'type': 'loss', 'content': 0.08525629341602325, 'timestamp': '2025-10-01 04:41:30.534376', 'step': 17602, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:30.588114', 'step': 17602, 'epoch': 3} {'type': 'loss', 'content': 0.09644145518541336, 'timestamp': '2025-10-01 04:41:30.590305', 'step': 17603, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:41:30.645531', 'step': 17603, 'epoch': 3} {'type': 'loss', 'content': 0.0717683881521225, 'timestamp': '2025-10-01 04:41:30.651590', 'step': 17604, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:30.706368', 'step': 17604, 'epoch': 3} {'type': 'loss', 'content': 0.03209938853979111, 'timestamp': '2025-10-01 04:41:30.708662', 'step': 17605, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:30.762489', 'step': 17605, 'epoch': 3} {'type': 'loss', 'content': 0.06320073455572128, 'timestamp': '2025-10-01 04:41:30.765584', 'step': 17606, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:30.819236', 'step': 17606, 'epoch': 3} {'type': 'loss', 'content': 0.07028313726186752, 'timestamp': '2025-10-01 04:41:30.821669', 'step': 17607, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:30.875637', 'step': 17607, 'epoch': 3} {'type': 'loss', 'content': 0.1071823462843895, 'timestamp': '2025-10-01 04:41:30.881631', 'step': 17608, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:30.934265', 'step': 17608, 'epoch': 3} {'type': 'loss', 'content': 0.061262425035238266, 'timestamp': '2025-10-01 04:41:30.937060', 'step': 17609, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:30.990240', 'step': 17609, 'epoch': 3} {'type': 'loss', 'content': 0.12373609095811844, 'timestamp': '2025-10-01 04:41:30.992439', 'step': 17610, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:31.046279', 'step': 17610, 'epoch': 3} {'type': 'loss', 'content': 0.12124033272266388, 'timestamp': '2025-10-01 04:41:31.049059', 'step': 17611, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:31.102358', 'step': 17611, 'epoch': 3} {'type': 'loss', 'content': 0.16298827528953552, 'timestamp': '2025-10-01 04:41:31.108293', 'step': 17612, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:31.161384', 'step': 17612, 'epoch': 3} {'type': 'loss', 'content': 0.05602320283651352, 'timestamp': '2025-10-01 04:41:31.163750', 'step': 17613, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:41:31.217221', 'step': 17613, 'epoch': 3} {'type': 'loss', 'content': 0.07709448039531708, 'timestamp': '2025-10-01 04:41:31.221193', 'step': 17614, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:31.277204', 'step': 17614, 'epoch': 3} {'type': 'loss', 'content': 0.13127537071704865, 'timestamp': '2025-10-01 04:41:31.279521', 'step': 17615, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:31.332993', 'step': 17615, 'epoch': 3} {'type': 'loss', 'content': 0.07103462517261505, 'timestamp': '2025-10-01 04:41:31.338973', 'step': 17616, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:31.391639', 'step': 17616, 'epoch': 3} {'type': 'loss', 'content': 0.06936831772327423, 'timestamp': '2025-10-01 04:41:31.393900', 'step': 17617, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:41:31.447131', 'step': 17617, 'epoch': 3} {'type': 'loss', 'content': 0.10651187598705292, 'timestamp': '2025-10-01 04:41:31.449397', 'step': 17618, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:41:31.502799', 'step': 17618, 'epoch': 3} {'type': 'loss', 'content': 0.1371636539697647, 'timestamp': '2025-10-01 04:41:31.515327', 'step': 17619, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:31.575674', 'step': 17619, 'epoch': 3} {'type': 'loss', 'content': 0.056696537882089615, 'timestamp': '2025-10-01 04:41:31.581394', 'step': 17620, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:31.634280', 'step': 17620, 'epoch': 3} {'type': 'loss', 'content': 0.02690444141626358, 'timestamp': '2025-10-01 04:41:31.636997', 'step': 17621, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:31.690642', 'step': 17621, 'epoch': 3} {'type': 'loss', 'content': 0.08862762153148651, 'timestamp': '2025-10-01 04:41:31.693078', 'step': 17622, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:31.747662', 'step': 17622, 'epoch': 3} {'type': 'loss', 'content': 0.1475847065448761, 'timestamp': '2025-10-01 04:41:31.749933', 'step': 17623, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:31.802603', 'step': 17623, 'epoch': 3} {'type': 'loss', 'content': 0.08245901763439178, 'timestamp': '2025-10-01 04:41:31.808479', 'step': 17624, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:31.860914', 'step': 17624, 'epoch': 3} {'type': 'loss', 'content': 0.1473088562488556, 'timestamp': '2025-10-01 04:41:31.864899', 'step': 17625, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:31.918067', 'step': 17625, 'epoch': 3} {'type': 'loss', 'content': 0.11309236288070679, 'timestamp': '2025-10-01 04:41:31.920442', 'step': 17626, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:31.974304', 'step': 17626, 'epoch': 3} {'type': 'loss', 'content': 0.16856859624385834, 'timestamp': '2025-10-01 04:41:31.977226', 'step': 17627, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:41:32.030927', 'step': 17627, 'epoch': 3} {'type': 'loss', 'content': 0.10009647160768509, 'timestamp': '2025-10-01 04:41:32.050790', 'step': 17628, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:41:32.103691', 'step': 17628, 'epoch': 3} {'type': 'loss', 'content': 0.08210406452417374, 'timestamp': '2025-10-01 04:41:32.105971', 'step': 17629, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:32.160252', 'step': 17629, 'epoch': 3} {'type': 'loss', 'content': 0.15559488534927368, 'timestamp': '2025-10-01 04:41:32.162628', 'step': 17630, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:32.217009', 'step': 17630, 'epoch': 3} {'type': 'loss', 'content': 0.11377100646495819, 'timestamp': '2025-10-01 04:41:32.219440', 'step': 17631, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:32.273275', 'step': 17631, 'epoch': 3} {'type': 'loss', 'content': 0.14254824817180634, 'timestamp': '2025-10-01 04:41:32.279239', 'step': 17632, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:41:32.332540', 'step': 17632, 'epoch': 3} {'type': 'loss', 'content': 0.15038177371025085, 'timestamp': '2025-10-01 04:41:32.334942', 'step': 17633, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:32.388233', 'step': 17633, 'epoch': 3} {'type': 'loss', 'content': 0.05846954882144928, 'timestamp': '2025-10-01 04:41:32.391062', 'step': 17634, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:32.445253', 'step': 17634, 'epoch': 3} {'type': 'loss', 'content': 0.12815387547016144, 'timestamp': '2025-10-01 04:41:32.447543', 'step': 17635, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:41:32.501337', 'step': 17635, 'epoch': 3} {'type': 'loss', 'content': 0.10309910774230957, 'timestamp': '2025-10-01 04:41:32.507336', 'step': 17636, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:32.560781', 'step': 17636, 'epoch': 3} {'type': 'loss', 'content': 0.07854931056499481, 'timestamp': '2025-10-01 04:41:32.562951', 'step': 17637, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:32.616691', 'step': 17637, 'epoch': 3} {'type': 'loss', 'content': 0.09899210929870605, 'timestamp': '2025-10-01 04:41:32.619380', 'step': 17638, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:32.673649', 'step': 17638, 'epoch': 3} {'type': 'loss', 'content': 0.05522022396326065, 'timestamp': '2025-10-01 04:41:32.679744', 'step': 17639, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:32.733044', 'step': 17639, 'epoch': 3} {'type': 'loss', 'content': 0.18055911362171173, 'timestamp': '2025-10-01 04:41:32.738931', 'step': 17640, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:41:32.791947', 'step': 17640, 'epoch': 3} {'type': 'loss', 'content': 0.14234867691993713, 'timestamp': '2025-10-01 04:41:32.794222', 'step': 17641, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:32.848239', 'step': 17641, 'epoch': 3} {'type': 'loss', 'content': 0.10536912083625793, 'timestamp': '2025-10-01 04:41:32.850379', 'step': 17642, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:32.903911', 'step': 17642, 'epoch': 3} {'type': 'loss', 'content': 0.10207489877939224, 'timestamp': '2025-10-01 04:41:32.906156', 'step': 17643, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:32.960672', 'step': 17643, 'epoch': 3} {'type': 'loss', 'content': 0.08756851404905319, 'timestamp': '2025-10-01 04:41:32.966704', 'step': 17644, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:33.020058', 'step': 17644, 'epoch': 3} {'type': 'loss', 'content': 0.1035393625497818, 'timestamp': '2025-10-01 04:41:33.022472', 'step': 17645, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:33.082847', 'step': 17645, 'epoch': 3} {'type': 'loss', 'content': 0.04868277162313461, 'timestamp': '2025-10-01 04:41:33.085561', 'step': 17646, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:33.139265', 'step': 17646, 'epoch': 3} {'type': 'loss', 'content': 0.12333697825670242, 'timestamp': '2025-10-01 04:41:33.141598', 'step': 17647, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:33.194929', 'step': 17647, 'epoch': 3} {'type': 'loss', 'content': 0.0981164500117302, 'timestamp': '2025-10-01 04:41:33.200983', 'step': 17648, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:33.254195', 'step': 17648, 'epoch': 3} {'type': 'loss', 'content': 0.07174862921237946, 'timestamp': '2025-10-01 04:41:33.256494', 'step': 17649, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:33.316406', 'step': 17649, 'epoch': 3} {'type': 'loss', 'content': 0.060901083052158356, 'timestamp': '2025-10-01 04:41:33.318633', 'step': 17650, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:33.372608', 'step': 17650, 'epoch': 3} {'type': 'loss', 'content': 0.22469621896743774, 'timestamp': '2025-10-01 04:41:33.375102', 'step': 17651, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:33.429241', 'step': 17651, 'epoch': 3} {'type': 'loss', 'content': 0.11503413319587708, 'timestamp': '2025-10-01 04:41:33.434895', 'step': 17652, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:41:33.488005', 'step': 17652, 'epoch': 3} {'type': 'loss', 'content': 0.08512271195650101, 'timestamp': '2025-10-01 04:41:33.490094', 'step': 17653, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:33.546687', 'step': 17653, 'epoch': 3} {'type': 'loss', 'content': 0.09529785811901093, 'timestamp': '2025-10-01 04:41:33.548893', 'step': 17654, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:33.603601', 'step': 17654, 'epoch': 3} {'type': 'loss', 'content': 0.2051045000553131, 'timestamp': '2025-10-01 04:41:33.606020', 'step': 17655, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:33.659826', 'step': 17655, 'epoch': 3} {'type': 'loss', 'content': 0.11508002877235413, 'timestamp': '2025-10-01 04:41:33.665864', 'step': 17656, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:41:33.719051', 'step': 17656, 'epoch': 3} {'type': 'loss', 'content': 0.04495205357670784, 'timestamp': '2025-10-01 04:41:33.721784', 'step': 17657, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:33.787331', 'step': 17657, 'epoch': 3} {'type': 'loss', 'content': 0.12847308814525604, 'timestamp': '2025-10-01 04:41:33.789827', 'step': 17658, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:33.844388', 'step': 17658, 'epoch': 3} {'type': 'loss', 'content': 0.0654262825846672, 'timestamp': '2025-10-01 04:41:33.859754', 'step': 17659, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:33.928848', 'step': 17659, 'epoch': 3} {'type': 'loss', 'content': 0.10306798666715622, 'timestamp': '2025-10-01 04:41:33.934679', 'step': 17660, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:41:33.987668', 'step': 17660, 'epoch': 3} {'type': 'loss', 'content': 0.11830563098192215, 'timestamp': '2025-10-01 04:41:33.989598', 'step': 17661, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:34.043944', 'step': 17661, 'epoch': 3} {'type': 'loss', 'content': 0.033068303018808365, 'timestamp': '2025-10-01 04:41:34.046400', 'step': 17662, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:34.100899', 'step': 17662, 'epoch': 3} {'type': 'loss', 'content': 0.10025793313980103, 'timestamp': '2025-10-01 04:41:34.103243', 'step': 17663, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:41:34.156740', 'step': 17663, 'epoch': 3} {'type': 'loss', 'content': 0.0754997506737709, 'timestamp': '2025-10-01 04:41:34.172066', 'step': 17664, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:34.238059', 'step': 17664, 'epoch': 3} {'type': 'loss', 'content': 0.1193201094865799, 'timestamp': '2025-10-01 04:41:34.240719', 'step': 17665, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:34.305470', 'step': 17665, 'epoch': 3} {'type': 'loss', 'content': 0.06788083165884018, 'timestamp': '2025-10-01 04:41:34.307641', 'step': 17666, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:41:34.361066', 'step': 17666, 'epoch': 3} {'type': 'loss', 'content': 0.07056625932455063, 'timestamp': '2025-10-01 04:41:34.363521', 'step': 17667, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:34.417140', 'step': 17667, 'epoch': 3} {'type': 'loss', 'content': 0.10723837465047836, 'timestamp': '2025-10-01 04:41:34.423423', 'step': 17668, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-01 04:41:34.476632', 'step': 17668, 'epoch': 3} {'type': 'loss', 'content': 0.09917476773262024, 'timestamp': '2025-10-01 04:41:34.479145', 'step': 17669, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:34.532376', 'step': 17669, 'epoch': 3} {'type': 'loss', 'content': 0.0696268230676651, 'timestamp': '2025-10-01 04:41:34.534536', 'step': 17670, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:34.588280', 'step': 17670, 'epoch': 3} {'type': 'loss', 'content': 0.049412138760089874, 'timestamp': '2025-10-01 04:41:34.590545', 'step': 17671, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:41:34.643649', 'step': 17671, 'epoch': 3} {'type': 'loss', 'content': 0.04107065871357918, 'timestamp': '2025-10-01 04:41:34.649880', 'step': 17672, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:41:34.702776', 'step': 17672, 'epoch': 3} {'type': 'loss', 'content': 0.04630744457244873, 'timestamp': '2025-10-01 04:41:34.714595', 'step': 17673, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:34.768330', 'step': 17673, 'epoch': 3} {'type': 'loss', 'content': 0.17528018355369568, 'timestamp': '2025-10-01 04:41:34.771092', 'step': 17674, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:34.824441', 'step': 17674, 'epoch': 3} {'type': 'loss', 'content': 0.13963191211223602, 'timestamp': '2025-10-01 04:41:34.826776', 'step': 17675, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:34.880813', 'step': 17675, 'epoch': 3} {'type': 'loss', 'content': 0.06916883587837219, 'timestamp': '2025-10-01 04:41:34.886769', 'step': 17676, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:34.940083', 'step': 17676, 'epoch': 3} {'type': 'loss', 'content': 0.0711708664894104, 'timestamp': '2025-10-01 04:41:34.942912', 'step': 17677, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:34.999620', 'step': 17677, 'epoch': 3} {'type': 'loss', 'content': 0.13896165788173676, 'timestamp': '2025-10-01 04:41:35.001814', 'step': 17678, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:41:35.055363', 'step': 17678, 'epoch': 3} {'type': 'loss', 'content': 0.0784529522061348, 'timestamp': '2025-10-01 04:41:35.057646', 'step': 17679, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:41:35.110726', 'step': 17679, 'epoch': 3} {'type': 'loss', 'content': 0.05765834078192711, 'timestamp': '2025-10-01 04:41:35.116604', 'step': 17680, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:35.169723', 'step': 17680, 'epoch': 3} {'type': 'loss', 'content': 0.15111716091632843, 'timestamp': '2025-10-01 04:41:35.173661', 'step': 17681, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:35.227056', 'step': 17681, 'epoch': 3} {'type': 'loss', 'content': 0.07584653049707413, 'timestamp': '2025-10-01 04:41:35.229393', 'step': 17682, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:41:35.283135', 'step': 17682, 'epoch': 3} {'type': 'loss', 'content': 0.06448289006948471, 'timestamp': '2025-10-01 04:41:35.285378', 'step': 17683, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:35.338802', 'step': 17683, 'epoch': 3} {'type': 'loss', 'content': 0.11828663945198059, 'timestamp': '2025-10-01 04:41:35.344875', 'step': 17684, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:35.397811', 'step': 17684, 'epoch': 3} {'type': 'loss', 'content': 0.1179991215467453, 'timestamp': '2025-10-01 04:41:35.400059', 'step': 17685, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:41:35.453273', 'step': 17685, 'epoch': 3} {'type': 'loss', 'content': 0.10515181720256805, 'timestamp': '2025-10-01 04:41:35.455657', 'step': 17686, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:41:35.509215', 'step': 17686, 'epoch': 3} {'type': 'loss', 'content': 0.1273338943719864, 'timestamp': '2025-10-01 04:41:35.511633', 'step': 17687, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:35.564923', 'step': 17687, 'epoch': 3} {'type': 'loss', 'content': 0.04892144352197647, 'timestamp': '2025-10-01 04:41:35.571025', 'step': 17688, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:35.624104', 'step': 17688, 'epoch': 3} {'type': 'loss', 'content': 0.14429597556591034, 'timestamp': '2025-10-01 04:41:35.626378', 'step': 17689, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:35.679659', 'step': 17689, 'epoch': 3} {'type': 'loss', 'content': 0.15549857914447784, 'timestamp': '2025-10-01 04:41:35.681833', 'step': 17690, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:35.735876', 'step': 17690, 'epoch': 3} {'type': 'loss', 'content': 0.05782870948314667, 'timestamp': '2025-10-01 04:41:35.739385', 'step': 17691, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:35.802663', 'step': 17691, 'epoch': 3} {'type': 'loss', 'content': 0.16224302351474762, 'timestamp': '2025-10-01 04:41:35.808762', 'step': 17692, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:35.862819', 'step': 17692, 'epoch': 3} {'type': 'loss', 'content': 0.04310920089483261, 'timestamp': '2025-10-01 04:41:35.865151', 'step': 17693, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:35.918900', 'step': 17693, 'epoch': 3} {'type': 'loss', 'content': 0.12435910105705261, 'timestamp': '2025-10-01 04:41:35.921373', 'step': 17694, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:41:35.974949', 'step': 17694, 'epoch': 3} {'type': 'loss', 'content': 0.11177622526884079, 'timestamp': '2025-10-01 04:41:35.977362', 'step': 17695, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:36.030732', 'step': 17695, 'epoch': 3} {'type': 'loss', 'content': 0.06949258595705032, 'timestamp': '2025-10-01 04:41:36.036597', 'step': 17696, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:41:36.089778', 'step': 17696, 'epoch': 3} {'type': 'loss', 'content': 0.0705852136015892, 'timestamp': '2025-10-01 04:41:36.092215', 'step': 17697, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:36.145939', 'step': 17697, 'epoch': 3} {'type': 'loss', 'content': 0.11699775606393814, 'timestamp': '2025-10-01 04:41:36.149144', 'step': 17698, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:36.203412', 'step': 17698, 'epoch': 3} {'type': 'loss', 'content': 0.0706169605255127, 'timestamp': '2025-10-01 04:41:36.205928', 'step': 17699, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:36.264972', 'step': 17699, 'epoch': 3} {'type': 'loss', 'content': 0.13700532913208008, 'timestamp': '2025-10-01 04:41:36.273072', 'step': 17700, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:36.328366', 'step': 17700, 'epoch': 3} {'type': 'loss', 'content': 0.08349752426147461, 'timestamp': '2025-10-01 04:41:36.333381', 'step': 17701, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:36.396875', 'step': 17701, 'epoch': 3} {'type': 'loss', 'content': 0.12486216425895691, 'timestamp': '2025-10-01 04:41:36.399544', 'step': 17702, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:36.453763', 'step': 17702, 'epoch': 3} {'type': 'loss', 'content': 0.11935856938362122, 'timestamp': '2025-10-01 04:41:36.456205', 'step': 17703, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:36.509930', 'step': 17703, 'epoch': 3} {'type': 'loss', 'content': 0.10417631268501282, 'timestamp': '2025-10-01 04:41:36.516183', 'step': 17704, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:36.569363', 'step': 17704, 'epoch': 3} {'type': 'loss', 'content': 0.026901116594672203, 'timestamp': '2025-10-01 04:41:36.574549', 'step': 17705, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:36.627813', 'step': 17705, 'epoch': 3} {'type': 'loss', 'content': 0.1873425841331482, 'timestamp': '2025-10-01 04:41:36.630105', 'step': 17706, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:36.683594', 'step': 17706, 'epoch': 3} {'type': 'loss', 'content': 0.12428999692201614, 'timestamp': '2025-10-01 04:41:36.685862', 'step': 17707, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:36.739076', 'step': 17707, 'epoch': 3} {'type': 'loss', 'content': 0.07792389392852783, 'timestamp': '2025-10-01 04:41:36.745307', 'step': 17708, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-01 04:41:50.218495', 'step': 17708, 'epoch': 3} {'type': 'pplx', 'content': 10632.599617934147, 'timestamp': '2025-10-01 04:41:50.221312', 'step': 17708, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:41:50.273674', 'step': 17708, 'epoch': 3} {'type': 'loss', 'content': 0.06409682333469391, 'timestamp': '2025-10-01 04:41:50.276344', 'step': 17709, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:50.330116', 'step': 17709, 'epoch': 3} {'type': 'loss', 'content': 0.12795549631118774, 'timestamp': '2025-10-01 04:41:50.332372', 'step': 17710, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:50.385530', 'step': 17710, 'epoch': 3} {'type': 'loss', 'content': 0.0922144427895546, 'timestamp': '2025-10-01 04:41:50.387948', 'step': 17711, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:41:50.450291', 'step': 17711, 'epoch': 3} {'type': 'loss', 'content': 0.2661752700805664, 'timestamp': '2025-10-01 04:41:50.463081', 'step': 17712, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:50.515632', 'step': 17712, 'epoch': 3} {'type': 'loss', 'content': 0.06232473999261856, 'timestamp': '2025-10-01 04:41:50.517719', 'step': 17713, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:41:50.570907', 'step': 17713, 'epoch': 3} {'type': 'loss', 'content': 0.12320281565189362, 'timestamp': '2025-10-01 04:41:50.573069', 'step': 17714, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:50.626524', 'step': 17714, 'epoch': 3} {'type': 'loss', 'content': 0.08530382066965103, 'timestamp': '2025-10-01 04:41:50.628783', 'step': 17715, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:50.682214', 'step': 17715, 'epoch': 3} {'type': 'loss', 'content': 0.09978172928094864, 'timestamp': '2025-10-01 04:41:50.687961', 'step': 17716, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:50.740615', 'step': 17716, 'epoch': 3} {'type': 'loss', 'content': 0.061801884323358536, 'timestamp': '2025-10-01 04:41:50.742788', 'step': 17717, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:41:50.796316', 'step': 17717, 'epoch': 3} {'type': 'loss', 'content': 0.1282685101032257, 'timestamp': '2025-10-01 04:41:50.798475', 'step': 17718, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:50.853632', 'step': 17718, 'epoch': 3} {'type': 'loss', 'content': 0.016803724691271782, 'timestamp': '2025-10-01 04:41:50.855759', 'step': 17719, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:50.911583', 'step': 17719, 'epoch': 3} {'type': 'loss', 'content': 0.0923699215054512, 'timestamp': '2025-10-01 04:41:50.917545', 'step': 17720, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:41:50.972219', 'step': 17720, 'epoch': 3} {'type': 'loss', 'content': 0.04978836700320244, 'timestamp': '2025-10-01 04:41:50.974477', 'step': 17721, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:51.027546', 'step': 17721, 'epoch': 3} {'type': 'loss', 'content': 0.10396192967891693, 'timestamp': '2025-10-01 04:41:51.029950', 'step': 17722, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:41:51.083811', 'step': 17722, 'epoch': 3} {'type': 'loss', 'content': 0.03917943313717842, 'timestamp': '2025-10-01 04:41:51.086508', 'step': 17723, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:51.146347', 'step': 17723, 'epoch': 3} {'type': 'loss', 'content': 0.11181499063968658, 'timestamp': '2025-10-01 04:41:51.152282', 'step': 17724, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:51.213734', 'step': 17724, 'epoch': 3} {'type': 'loss', 'content': 0.09965381026268005, 'timestamp': '2025-10-01 04:41:51.216186', 'step': 17725, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:51.269706', 'step': 17725, 'epoch': 3} {'type': 'loss', 'content': 0.08279483765363693, 'timestamp': '2025-10-01 04:41:51.271992', 'step': 17726, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:41:51.325022', 'step': 17726, 'epoch': 3} {'type': 'loss', 'content': 0.1880577951669693, 'timestamp': '2025-10-01 04:41:51.327230', 'step': 17727, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:51.380581', 'step': 17727, 'epoch': 3} {'type': 'loss', 'content': 0.11822289228439331, 'timestamp': '2025-10-01 04:41:51.386451', 'step': 17728, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:51.439103', 'step': 17728, 'epoch': 3} {'type': 'loss', 'content': 0.15061065554618835, 'timestamp': '2025-10-01 04:41:51.443008', 'step': 17729, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:51.497305', 'step': 17729, 'epoch': 3} {'type': 'loss', 'content': 0.12683983147144318, 'timestamp': '2025-10-01 04:41:51.499535', 'step': 17730, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:51.553461', 'step': 17730, 'epoch': 3} {'type': 'loss', 'content': 0.05773673579096794, 'timestamp': '2025-10-01 04:41:51.555982', 'step': 17731, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:51.609874', 'step': 17731, 'epoch': 3} {'type': 'loss', 'content': 0.017736919224262238, 'timestamp': '2025-10-01 04:41:51.615968', 'step': 17732, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:41:51.669387', 'step': 17732, 'epoch': 3} {'type': 'loss', 'content': 0.08728387206792831, 'timestamp': '2025-10-01 04:41:51.671587', 'step': 17733, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:51.724764', 'step': 17733, 'epoch': 3} {'type': 'loss', 'content': 0.0681348666548729, 'timestamp': '2025-10-01 04:41:51.734840', 'step': 17734, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:51.802993', 'step': 17734, 'epoch': 3} {'type': 'loss', 'content': 0.07429789006710052, 'timestamp': '2025-10-01 04:41:51.805822', 'step': 17735, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:51.859831', 'step': 17735, 'epoch': 3} {'type': 'loss', 'content': 0.10451114177703857, 'timestamp': '2025-10-01 04:41:51.865951', 'step': 17736, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:51.919081', 'step': 17736, 'epoch': 3} {'type': 'loss', 'content': 0.07857586443424225, 'timestamp': '2025-10-01 04:41:51.921305', 'step': 17737, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:51.974763', 'step': 17737, 'epoch': 3} {'type': 'loss', 'content': 0.050643190741539, 'timestamp': '2025-10-01 04:41:51.977094', 'step': 17738, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:41:52.031761', 'step': 17738, 'epoch': 3} {'type': 'loss', 'content': 0.1972581446170807, 'timestamp': '2025-10-01 04:41:52.034917', 'step': 17739, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:52.088712', 'step': 17739, 'epoch': 3} {'type': 'loss', 'content': 0.17433390021324158, 'timestamp': '2025-10-01 04:41:52.094970', 'step': 17740, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:52.148031', 'step': 17740, 'epoch': 3} {'type': 'loss', 'content': 0.13157495856285095, 'timestamp': '2025-10-01 04:41:52.150240', 'step': 17741, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:52.218308', 'step': 17741, 'epoch': 3} {'type': 'loss', 'content': 0.11510532349348068, 'timestamp': '2025-10-01 04:41:52.220544', 'step': 17742, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:52.274190', 'step': 17742, 'epoch': 3} {'type': 'loss', 'content': 0.05133755877614021, 'timestamp': '2025-10-01 04:41:52.276341', 'step': 17743, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:52.330715', 'step': 17743, 'epoch': 3} {'type': 'loss', 'content': 0.05255437642335892, 'timestamp': '2025-10-01 04:41:52.337047', 'step': 17744, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:52.400292', 'step': 17744, 'epoch': 3} {'type': 'loss', 'content': 0.12214549630880356, 'timestamp': '2025-10-01 04:41:52.402711', 'step': 17745, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:41:52.456160', 'step': 17745, 'epoch': 3} {'type': 'loss', 'content': 0.08185594528913498, 'timestamp': '2025-10-01 04:41:52.458392', 'step': 17746, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:52.511890', 'step': 17746, 'epoch': 3} {'type': 'loss', 'content': 0.06978251039981842, 'timestamp': '2025-10-01 04:41:52.514001', 'step': 17747, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:41:52.567078', 'step': 17747, 'epoch': 3} {'type': 'loss', 'content': 0.0394800640642643, 'timestamp': '2025-10-01 04:41:52.572783', 'step': 17748, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:52.625424', 'step': 17748, 'epoch': 3} {'type': 'loss', 'content': 0.02408657968044281, 'timestamp': '2025-10-01 04:41:52.627706', 'step': 17749, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:52.681112', 'step': 17749, 'epoch': 3} {'type': 'loss', 'content': 0.1240631639957428, 'timestamp': '2025-10-01 04:41:52.683436', 'step': 17750, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:52.743498', 'step': 17750, 'epoch': 3} {'type': 'loss', 'content': 0.10456685721874237, 'timestamp': '2025-10-01 04:41:52.748719', 'step': 17751, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:52.802012', 'step': 17751, 'epoch': 3} {'type': 'loss', 'content': 0.04764493182301521, 'timestamp': '2025-10-01 04:41:52.808039', 'step': 17752, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:52.861229', 'step': 17752, 'epoch': 3} {'type': 'loss', 'content': 0.04506539925932884, 'timestamp': '2025-10-01 04:41:52.863549', 'step': 17753, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:52.916948', 'step': 17753, 'epoch': 3} {'type': 'loss', 'content': 0.069334015250206, 'timestamp': '2025-10-01 04:41:52.919214', 'step': 17754, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:41:52.974424', 'step': 17754, 'epoch': 3} {'type': 'loss', 'content': 0.04842913895845413, 'timestamp': '2025-10-01 04:41:52.976662', 'step': 17755, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:53.030117', 'step': 17755, 'epoch': 3} {'type': 'loss', 'content': 0.08953897655010223, 'timestamp': '2025-10-01 04:41:53.035937', 'step': 17756, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:53.088479', 'step': 17756, 'epoch': 3} {'type': 'loss', 'content': 0.13848252594470978, 'timestamp': '2025-10-01 04:41:53.090712', 'step': 17757, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:53.144081', 'step': 17757, 'epoch': 3} {'type': 'loss', 'content': 0.11127076297998428, 'timestamp': '2025-10-01 04:41:53.146181', 'step': 17758, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:53.199555', 'step': 17758, 'epoch': 3} {'type': 'loss', 'content': 0.08486541360616684, 'timestamp': '2025-10-01 04:41:53.201690', 'step': 17759, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:53.254869', 'step': 17759, 'epoch': 3} {'type': 'loss', 'content': 0.08794116973876953, 'timestamp': '2025-10-01 04:41:53.260630', 'step': 17760, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:53.313120', 'step': 17760, 'epoch': 3} {'type': 'loss', 'content': 0.17695733904838562, 'timestamp': '2025-10-01 04:41:53.315467', 'step': 17761, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:53.368757', 'step': 17761, 'epoch': 3} {'type': 'loss', 'content': 0.07992212474346161, 'timestamp': '2025-10-01 04:41:53.370941', 'step': 17762, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:41:53.424946', 'step': 17762, 'epoch': 3} {'type': 'loss', 'content': 0.06308067589998245, 'timestamp': '2025-10-01 04:41:53.427256', 'step': 17763, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:53.480436', 'step': 17763, 'epoch': 3} {'type': 'loss', 'content': 0.10268346220254898, 'timestamp': '2025-10-01 04:41:53.486175', 'step': 17764, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:53.539267', 'step': 17764, 'epoch': 3} {'type': 'loss', 'content': 0.04345233365893364, 'timestamp': '2025-10-01 04:41:53.545367', 'step': 17765, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:53.599229', 'step': 17765, 'epoch': 3} {'type': 'loss', 'content': 0.10796579718589783, 'timestamp': '2025-10-01 04:41:53.601388', 'step': 17766, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:53.654167', 'step': 17766, 'epoch': 3} {'type': 'loss', 'content': 0.08340655267238617, 'timestamp': '2025-10-01 04:41:53.656506', 'step': 17767, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:53.710394', 'step': 17767, 'epoch': 3} {'type': 'loss', 'content': 0.061746418476104736, 'timestamp': '2025-10-01 04:41:53.716472', 'step': 17768, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:53.769815', 'step': 17768, 'epoch': 3} {'type': 'loss', 'content': 0.09707003831863403, 'timestamp': '2025-10-01 04:41:53.771926', 'step': 17769, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:53.825036', 'step': 17769, 'epoch': 3} {'type': 'loss', 'content': 0.07340846955776215, 'timestamp': '2025-10-01 04:41:53.827205', 'step': 17770, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:53.880821', 'step': 17770, 'epoch': 3} {'type': 'loss', 'content': 0.07104195654392242, 'timestamp': '2025-10-01 04:41:53.882999', 'step': 17771, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:53.937217', 'step': 17771, 'epoch': 3} {'type': 'loss', 'content': 0.08340616524219513, 'timestamp': '2025-10-01 04:41:53.943482', 'step': 17772, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:53.998378', 'step': 17772, 'epoch': 3} {'type': 'loss', 'content': 0.08214514702558517, 'timestamp': '2025-10-01 04:41:54.000556', 'step': 17773, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:54.055380', 'step': 17773, 'epoch': 3} {'type': 'loss', 'content': 0.06629620492458344, 'timestamp': '2025-10-01 04:41:54.057584', 'step': 17774, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:54.111806', 'step': 17774, 'epoch': 3} {'type': 'loss', 'content': 0.08670729398727417, 'timestamp': '2025-10-01 04:41:54.113969', 'step': 17775, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:54.168195', 'step': 17775, 'epoch': 3} {'type': 'loss', 'content': 0.08930391073226929, 'timestamp': '2025-10-01 04:41:54.174520', 'step': 17776, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:54.227838', 'step': 17776, 'epoch': 3} {'type': 'loss', 'content': 0.10789377242326736, 'timestamp': '2025-10-01 04:41:54.229955', 'step': 17777, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:54.283699', 'step': 17777, 'epoch': 3} {'type': 'loss', 'content': 0.07378680258989334, 'timestamp': '2025-10-01 04:41:54.285775', 'step': 17778, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:54.341006', 'step': 17778, 'epoch': 3} {'type': 'loss', 'content': 0.14826342463493347, 'timestamp': '2025-10-01 04:41:54.344989', 'step': 17779, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:54.401953', 'step': 17779, 'epoch': 3} {'type': 'loss', 'content': 0.1389094740152359, 'timestamp': '2025-10-01 04:41:54.408169', 'step': 17780, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:54.460944', 'step': 17780, 'epoch': 3} {'type': 'loss', 'content': 0.06467511504888535, 'timestamp': '2025-10-01 04:41:54.462832', 'step': 17781, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:54.515781', 'step': 17781, 'epoch': 3} {'type': 'loss', 'content': 0.11767977476119995, 'timestamp': '2025-10-01 04:41:54.517837', 'step': 17782, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:54.571841', 'step': 17782, 'epoch': 3} {'type': 'loss', 'content': 0.015889938920736313, 'timestamp': '2025-10-01 04:41:54.574178', 'step': 17783, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:54.627489', 'step': 17783, 'epoch': 3} {'type': 'loss', 'content': 0.044085871428251266, 'timestamp': '2025-10-01 04:41:54.633582', 'step': 17784, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:54.686672', 'step': 17784, 'epoch': 3} {'type': 'loss', 'content': 0.09658563882112503, 'timestamp': '2025-10-01 04:41:54.688778', 'step': 17785, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:54.741423', 'step': 17785, 'epoch': 3} {'type': 'loss', 'content': 0.06629782915115356, 'timestamp': '2025-10-01 04:41:54.743574', 'step': 17786, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:54.796465', 'step': 17786, 'epoch': 3} {'type': 'loss', 'content': 0.0752665251493454, 'timestamp': '2025-10-01 04:41:54.798668', 'step': 17787, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:54.852624', 'step': 17787, 'epoch': 3} {'type': 'loss', 'content': 0.0675952211022377, 'timestamp': '2025-10-01 04:41:54.858528', 'step': 17788, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:41:54.911616', 'step': 17788, 'epoch': 3} {'type': 'loss', 'content': 0.15385892987251282, 'timestamp': '2025-10-01 04:41:54.913849', 'step': 17789, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:54.967484', 'step': 17789, 'epoch': 3} {'type': 'loss', 'content': 0.06610582023859024, 'timestamp': '2025-10-01 04:41:54.969659', 'step': 17790, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:55.024297', 'step': 17790, 'epoch': 3} {'type': 'loss', 'content': 0.07577091455459595, 'timestamp': '2025-10-01 04:41:55.026490', 'step': 17791, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:55.080603', 'step': 17791, 'epoch': 3} {'type': 'loss', 'content': 0.04923965781927109, 'timestamp': '2025-10-01 04:41:55.086750', 'step': 17792, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:55.139371', 'step': 17792, 'epoch': 3} {'type': 'loss', 'content': 0.14564087986946106, 'timestamp': '2025-10-01 04:41:55.141930', 'step': 17793, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:55.194864', 'step': 17793, 'epoch': 3} {'type': 'loss', 'content': 0.07212136685848236, 'timestamp': '2025-10-01 04:41:55.197037', 'step': 17794, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:41:55.250602', 'step': 17794, 'epoch': 3} {'type': 'loss', 'content': 0.09441046416759491, 'timestamp': '2025-10-01 04:41:55.252506', 'step': 17795, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:55.305377', 'step': 17795, 'epoch': 3} {'type': 'loss', 'content': 0.15619602799415588, 'timestamp': '2025-10-01 04:41:55.311554', 'step': 17796, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:55.364805', 'step': 17796, 'epoch': 3} {'type': 'loss', 'content': 0.14406201243400574, 'timestamp': '2025-10-01 04:41:55.367026', 'step': 17797, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:55.421360', 'step': 17797, 'epoch': 3} {'type': 'loss', 'content': 0.10038230568170547, 'timestamp': '2025-10-01 04:41:55.423250', 'step': 17798, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:41:55.476942', 'step': 17798, 'epoch': 3} {'type': 'loss', 'content': 0.09570051729679108, 'timestamp': '2025-10-01 04:41:55.479169', 'step': 17799, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:55.534185', 'step': 17799, 'epoch': 3} {'type': 'loss', 'content': 0.09910660982131958, 'timestamp': '2025-10-01 04:41:55.541145', 'step': 17800, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:41:55.598786', 'step': 17800, 'epoch': 3} {'type': 'loss', 'content': 0.09867488592863083, 'timestamp': '2025-10-01 04:41:55.601019', 'step': 17801, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:55.659100', 'step': 17801, 'epoch': 3} {'type': 'loss', 'content': 0.14069826900959015, 'timestamp': '2025-10-01 04:41:55.661367', 'step': 17802, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:55.719850', 'step': 17802, 'epoch': 3} {'type': 'loss', 'content': 0.07962729781866074, 'timestamp': '2025-10-01 04:41:55.722240', 'step': 17803, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:41:55.780920', 'step': 17803, 'epoch': 3} {'type': 'loss', 'content': 0.16675642132759094, 'timestamp': '2025-10-01 04:41:55.787784', 'step': 17804, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:41:55.844711', 'step': 17804, 'epoch': 3} {'type': 'loss', 'content': 0.03224911168217659, 'timestamp': '2025-10-01 04:41:55.860660', 'step': 17805, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:55.917745', 'step': 17805, 'epoch': 3} {'type': 'loss', 'content': 0.038734883069992065, 'timestamp': '2025-10-01 04:41:55.920134', 'step': 17806, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:55.978147', 'step': 17806, 'epoch': 3} {'type': 'loss', 'content': 0.046834252774715424, 'timestamp': '2025-10-01 04:41:55.980409', 'step': 17807, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:56.039224', 'step': 17807, 'epoch': 3} {'type': 'loss', 'content': 0.0804092213511467, 'timestamp': '2025-10-01 04:41:56.049167', 'step': 17808, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:56.106985', 'step': 17808, 'epoch': 3} {'type': 'loss', 'content': 0.1082521304488182, 'timestamp': '2025-10-01 04:41:56.109299', 'step': 17809, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:56.167065', 'step': 17809, 'epoch': 3} {'type': 'loss', 'content': 0.09439466893672943, 'timestamp': '2025-10-01 04:41:56.169423', 'step': 17810, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:56.224846', 'step': 17810, 'epoch': 3} {'type': 'loss', 'content': 0.06742656230926514, 'timestamp': '2025-10-01 04:41:56.227227', 'step': 17811, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:41:56.284581', 'step': 17811, 'epoch': 3} {'type': 'loss', 'content': 0.03473781421780586, 'timestamp': '2025-10-01 04:41:56.291099', 'step': 17812, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:56.344863', 'step': 17812, 'epoch': 3} {'type': 'loss', 'content': 0.045151032507419586, 'timestamp': '2025-10-01 04:41:56.347187', 'step': 17813, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:56.399952', 'step': 17813, 'epoch': 3} {'type': 'loss', 'content': 0.10186092555522919, 'timestamp': '2025-10-01 04:41:56.404135', 'step': 17814, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:56.458851', 'step': 17814, 'epoch': 3} {'type': 'loss', 'content': 0.0904877707362175, 'timestamp': '2025-10-01 04:41:56.460885', 'step': 17815, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-01 04:41:56.514124', 'step': 17815, 'epoch': 3} {'type': 'loss', 'content': 0.14123916625976562, 'timestamp': '2025-10-01 04:41:56.520553', 'step': 17816, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:56.573355', 'step': 17816, 'epoch': 3} {'type': 'loss', 'content': 0.05044666677713394, 'timestamp': '2025-10-01 04:41:56.575582', 'step': 17817, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:56.628490', 'step': 17817, 'epoch': 3} {'type': 'loss', 'content': 0.08064549416303635, 'timestamp': '2025-10-01 04:41:56.630840', 'step': 17818, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:56.685394', 'step': 17818, 'epoch': 3} {'type': 'loss', 'content': 0.15912528336048126, 'timestamp': '2025-10-01 04:41:56.687755', 'step': 17819, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:56.742366', 'step': 17819, 'epoch': 3} {'type': 'loss', 'content': 0.07760925590991974, 'timestamp': '2025-10-01 04:41:56.748837', 'step': 17820, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:56.802735', 'step': 17820, 'epoch': 3} {'type': 'loss', 'content': 0.15744519233703613, 'timestamp': '2025-10-01 04:41:56.805113', 'step': 17821, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:56.859215', 'step': 17821, 'epoch': 3} {'type': 'loss', 'content': 0.05962038412690163, 'timestamp': '2025-10-01 04:41:56.861777', 'step': 17822, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:56.916776', 'step': 17822, 'epoch': 3} {'type': 'loss', 'content': 0.13344010710716248, 'timestamp': '2025-10-01 04:41:56.920652', 'step': 17823, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:56.975569', 'step': 17823, 'epoch': 3} {'type': 'loss', 'content': 0.021124009042978287, 'timestamp': '2025-10-01 04:41:56.981911', 'step': 17824, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:41:57.038303', 'step': 17824, 'epoch': 3} {'type': 'loss', 'content': 0.0709216296672821, 'timestamp': '2025-10-01 04:41:57.040880', 'step': 17825, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:57.095730', 'step': 17825, 'epoch': 3} {'type': 'loss', 'content': 0.18902772665023804, 'timestamp': '2025-10-01 04:41:57.098637', 'step': 17826, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:57.152690', 'step': 17826, 'epoch': 3} {'type': 'loss', 'content': 0.06434770673513412, 'timestamp': '2025-10-01 04:41:57.155181', 'step': 17827, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:57.209230', 'step': 17827, 'epoch': 3} {'type': 'loss', 'content': 0.10883495211601257, 'timestamp': '2025-10-01 04:41:57.216208', 'step': 17828, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:57.271662', 'step': 17828, 'epoch': 3} {'type': 'loss', 'content': 0.08428777754306793, 'timestamp': '2025-10-01 04:41:57.274178', 'step': 17829, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:57.340532', 'step': 17829, 'epoch': 3} {'type': 'loss', 'content': 0.05093950778245926, 'timestamp': '2025-10-01 04:41:57.342683', 'step': 17830, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:57.397071', 'step': 17830, 'epoch': 3} {'type': 'loss', 'content': 0.07153453677892685, 'timestamp': '2025-10-01 04:41:57.399572', 'step': 17831, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:57.454223', 'step': 17831, 'epoch': 3} {'type': 'loss', 'content': 0.1252066195011139, 'timestamp': '2025-10-01 04:41:57.460463', 'step': 17832, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:57.514061', 'step': 17832, 'epoch': 3} {'type': 'loss', 'content': 0.10629824548959732, 'timestamp': '2025-10-01 04:41:57.518832', 'step': 17833, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:57.575944', 'step': 17833, 'epoch': 3} {'type': 'loss', 'content': 0.07623941451311111, 'timestamp': '2025-10-01 04:41:57.578351', 'step': 17834, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:57.632768', 'step': 17834, 'epoch': 3} {'type': 'loss', 'content': 0.10123966634273529, 'timestamp': '2025-10-01 04:41:57.635392', 'step': 17835, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:57.689850', 'step': 17835, 'epoch': 3} {'type': 'loss', 'content': 0.11986840516328812, 'timestamp': '2025-10-01 04:41:57.695668', 'step': 17836, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:57.749197', 'step': 17836, 'epoch': 3} {'type': 'loss', 'content': 0.04997509717941284, 'timestamp': '2025-10-01 04:41:57.751676', 'step': 17837, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:57.805823', 'step': 17837, 'epoch': 3} {'type': 'loss', 'content': 0.06889498978853226, 'timestamp': '2025-10-01 04:41:57.808282', 'step': 17838, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:57.862704', 'step': 17838, 'epoch': 3} {'type': 'loss', 'content': 0.13318881392478943, 'timestamp': '2025-10-01 04:41:57.865475', 'step': 17839, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:57.919608', 'step': 17839, 'epoch': 3} {'type': 'loss', 'content': 0.1837288737297058, 'timestamp': '2025-10-01 04:41:57.925771', 'step': 17840, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:57.993856', 'step': 17840, 'epoch': 3} {'type': 'loss', 'content': 0.16025127470493317, 'timestamp': '2025-10-01 04:41:58.004115', 'step': 17841, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:58.064946', 'step': 17841, 'epoch': 3} {'type': 'loss', 'content': 0.06625007092952728, 'timestamp': '2025-10-01 04:41:58.067348', 'step': 17842, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:58.132874', 'step': 17842, 'epoch': 3} {'type': 'loss', 'content': 0.07846580445766449, 'timestamp': '2025-10-01 04:41:58.146165', 'step': 17843, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:41:58.199438', 'step': 17843, 'epoch': 3} {'type': 'loss', 'content': 0.11219040304422379, 'timestamp': '2025-10-01 04:41:58.205266', 'step': 17844, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:41:58.258128', 'step': 17844, 'epoch': 3} {'type': 'loss', 'content': 0.13991442322731018, 'timestamp': '2025-10-01 04:41:58.260424', 'step': 17845, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:41:58.313869', 'step': 17845, 'epoch': 3} {'type': 'loss', 'content': 0.06340906023979187, 'timestamp': '2025-10-01 04:41:58.316162', 'step': 17846, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:58.370448', 'step': 17846, 'epoch': 3} {'type': 'loss', 'content': 0.025607112795114517, 'timestamp': '2025-10-01 04:41:58.372555', 'step': 17847, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:58.427947', 'step': 17847, 'epoch': 3} {'type': 'loss', 'content': 0.08538095653057098, 'timestamp': '2025-10-01 04:41:58.434031', 'step': 17848, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:58.487323', 'step': 17848, 'epoch': 3} {'type': 'loss', 'content': 0.15010175108909607, 'timestamp': '2025-10-01 04:41:58.489501', 'step': 17849, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:58.542608', 'step': 17849, 'epoch': 3} {'type': 'loss', 'content': 0.0665699914097786, 'timestamp': '2025-10-01 04:41:58.544720', 'step': 17850, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:58.598668', 'step': 17850, 'epoch': 3} {'type': 'loss', 'content': 0.07065045833587646, 'timestamp': '2025-10-01 04:41:58.600932', 'step': 17851, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:58.654798', 'step': 17851, 'epoch': 3} {'type': 'loss', 'content': 0.086699478328228, 'timestamp': '2025-10-01 04:41:58.660580', 'step': 17852, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:58.713726', 'step': 17852, 'epoch': 3} {'type': 'loss', 'content': 0.10073598474264145, 'timestamp': '2025-10-01 04:41:58.716232', 'step': 17853, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:58.779970', 'step': 17853, 'epoch': 3} {'type': 'loss', 'content': 0.1458142250776291, 'timestamp': '2025-10-01 04:41:58.782285', 'step': 17854, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:58.836294', 'step': 17854, 'epoch': 3} {'type': 'loss', 'content': 0.06539617478847504, 'timestamp': '2025-10-01 04:41:58.838841', 'step': 17855, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-01 04:41:58.892268', 'step': 17855, 'epoch': 3} {'type': 'loss', 'content': 0.16355034708976746, 'timestamp': '2025-10-01 04:41:58.897970', 'step': 17856, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:58.951317', 'step': 17856, 'epoch': 3} {'type': 'loss', 'content': 0.16569750010967255, 'timestamp': '2025-10-01 04:41:58.953361', 'step': 17857, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:41:59.012650', 'step': 17857, 'epoch': 3} {'type': 'loss', 'content': 0.17305615544319153, 'timestamp': '2025-10-01 04:41:59.014703', 'step': 17858, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:59.068436', 'step': 17858, 'epoch': 3} {'type': 'loss', 'content': 0.14739833772182465, 'timestamp': '2025-10-01 04:41:59.070623', 'step': 17859, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:59.124970', 'step': 17859, 'epoch': 3} {'type': 'loss', 'content': 0.1089208796620369, 'timestamp': '2025-10-01 04:41:59.130744', 'step': 17860, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:59.183671', 'step': 17860, 'epoch': 3} {'type': 'loss', 'content': 0.09067360311746597, 'timestamp': '2025-10-01 04:41:59.188907', 'step': 17861, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:59.243408', 'step': 17861, 'epoch': 3} {'type': 'loss', 'content': 0.06393399089574814, 'timestamp': '2025-10-01 04:41:59.245660', 'step': 17862, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:59.300617', 'step': 17862, 'epoch': 3} {'type': 'loss', 'content': 0.08628931641578674, 'timestamp': '2025-10-01 04:41:59.302816', 'step': 17863, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:59.357124', 'step': 17863, 'epoch': 3} {'type': 'loss', 'content': 0.05408818647265434, 'timestamp': '2025-10-01 04:41:59.362972', 'step': 17864, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:59.415470', 'step': 17864, 'epoch': 3} {'type': 'loss', 'content': 0.08724962919950485, 'timestamp': '2025-10-01 04:41:59.417791', 'step': 17865, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:59.470622', 'step': 17865, 'epoch': 3} {'type': 'loss', 'content': 0.05992235988378525, 'timestamp': '2025-10-01 04:41:59.472839', 'step': 17866, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:41:59.527199', 'step': 17866, 'epoch': 3} {'type': 'loss', 'content': 0.09332635998725891, 'timestamp': '2025-10-01 04:41:59.529419', 'step': 17867, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:59.582790', 'step': 17867, 'epoch': 3} {'type': 'loss', 'content': 0.08857682347297668, 'timestamp': '2025-10-01 04:41:59.588713', 'step': 17868, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:41:59.641697', 'step': 17868, 'epoch': 3} {'type': 'loss', 'content': 0.07788429409265518, 'timestamp': '2025-10-01 04:41:59.644080', 'step': 17869, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:59.697924', 'step': 17869, 'epoch': 3} {'type': 'loss', 'content': 0.05442918837070465, 'timestamp': '2025-10-01 04:41:59.700038', 'step': 17870, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:59.754015', 'step': 17870, 'epoch': 3} {'type': 'loss', 'content': 0.10587935894727707, 'timestamp': '2025-10-01 04:41:59.756269', 'step': 17871, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:41:59.809320', 'step': 17871, 'epoch': 3} {'type': 'loss', 'content': 0.1862187534570694, 'timestamp': '2025-10-01 04:41:59.815094', 'step': 17872, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:59.867921', 'step': 17872, 'epoch': 3} {'type': 'loss', 'content': 0.07729711383581161, 'timestamp': '2025-10-01 04:41:59.869938', 'step': 17873, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:41:59.922973', 'step': 17873, 'epoch': 3} {'type': 'loss', 'content': 0.10988540202379227, 'timestamp': '2025-10-01 04:41:59.925124', 'step': 17874, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:41:59.978319', 'step': 17874, 'epoch': 3} {'type': 'loss', 'content': 0.07396771758794785, 'timestamp': '2025-10-01 04:41:59.980459', 'step': 17875, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:00.034397', 'step': 17875, 'epoch': 3} {'type': 'loss', 'content': 0.0776405856013298, 'timestamp': '2025-10-01 04:42:00.048408', 'step': 17876, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:00.101065', 'step': 17876, 'epoch': 3} {'type': 'loss', 'content': 0.10664676874876022, 'timestamp': '2025-10-01 04:42:00.103189', 'step': 17877, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:00.156666', 'step': 17877, 'epoch': 3} {'type': 'loss', 'content': 0.17760896682739258, 'timestamp': '2025-10-01 04:42:00.159320', 'step': 17878, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:00.212682', 'step': 17878, 'epoch': 3} {'type': 'loss', 'content': 0.09185793995857239, 'timestamp': '2025-10-01 04:42:00.214872', 'step': 17879, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:00.268107', 'step': 17879, 'epoch': 3} {'type': 'loss', 'content': 0.12988406419754028, 'timestamp': '2025-10-01 04:42:00.273844', 'step': 17880, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:00.326916', 'step': 17880, 'epoch': 3} {'type': 'loss', 'content': 0.12498043477535248, 'timestamp': '2025-10-01 04:42:00.329940', 'step': 17881, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:00.382886', 'step': 17881, 'epoch': 3} {'type': 'loss', 'content': 0.12620557844638824, 'timestamp': '2025-10-01 04:42:00.385219', 'step': 17882, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:00.438666', 'step': 17882, 'epoch': 3} {'type': 'loss', 'content': 0.12485843151807785, 'timestamp': '2025-10-01 04:42:00.440887', 'step': 17883, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:00.494191', 'step': 17883, 'epoch': 3} {'type': 'loss', 'content': 0.11366604268550873, 'timestamp': '2025-10-01 04:42:00.502400', 'step': 17884, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:00.555419', 'step': 17884, 'epoch': 3} {'type': 'loss', 'content': 0.09332848340272903, 'timestamp': '2025-10-01 04:42:00.558560', 'step': 17885, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:00.613446', 'step': 17885, 'epoch': 3} {'type': 'loss', 'content': 0.11803244799375534, 'timestamp': '2025-10-01 04:42:00.615574', 'step': 17886, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:00.669443', 'step': 17886, 'epoch': 3} {'type': 'loss', 'content': 0.12822401523590088, 'timestamp': '2025-10-01 04:42:00.671580', 'step': 17887, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:00.725777', 'step': 17887, 'epoch': 3} {'type': 'loss', 'content': 0.03147966042160988, 'timestamp': '2025-10-01 04:42:00.731564', 'step': 17888, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:00.786990', 'step': 17888, 'epoch': 3} {'type': 'loss', 'content': 0.09066566824913025, 'timestamp': '2025-10-01 04:42:00.789183', 'step': 17889, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:00.850779', 'step': 17889, 'epoch': 3} {'type': 'loss', 'content': 0.06853318214416504, 'timestamp': '2025-10-01 04:42:00.853294', 'step': 17890, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:00.917002', 'step': 17890, 'epoch': 3} {'type': 'loss', 'content': 0.1449163258075714, 'timestamp': '2025-10-01 04:42:00.920162', 'step': 17891, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:00.973739', 'step': 17891, 'epoch': 3} {'type': 'loss', 'content': 0.11814684420824051, 'timestamp': '2025-10-01 04:42:00.979959', 'step': 17892, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:01.032823', 'step': 17892, 'epoch': 3} {'type': 'loss', 'content': 0.12460251897573471, 'timestamp': '2025-10-01 04:42:01.035772', 'step': 17893, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:42:01.089666', 'step': 17893, 'epoch': 3} {'type': 'loss', 'content': 0.08868759870529175, 'timestamp': '2025-10-01 04:42:01.091794', 'step': 17894, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:01.148129', 'step': 17894, 'epoch': 3} {'type': 'loss', 'content': 0.08048051595687866, 'timestamp': '2025-10-01 04:42:01.150928', 'step': 17895, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:42:01.207235', 'step': 17895, 'epoch': 3} {'type': 'loss', 'content': 0.06282607465982437, 'timestamp': '2025-10-01 04:42:01.213144', 'step': 17896, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-01 04:42:01.266179', 'step': 17896, 'epoch': 3} {'type': 'loss', 'content': 0.12312084436416626, 'timestamp': '2025-10-01 04:42:01.268508', 'step': 17897, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:01.322852', 'step': 17897, 'epoch': 3} {'type': 'loss', 'content': 0.04597554728388786, 'timestamp': '2025-10-01 04:42:01.325191', 'step': 17898, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:01.378999', 'step': 17898, 'epoch': 3} {'type': 'loss', 'content': 0.025885192677378654, 'timestamp': '2025-10-01 04:42:01.381195', 'step': 17899, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:01.435319', 'step': 17899, 'epoch': 3} {'type': 'loss', 'content': 0.11084916442632675, 'timestamp': '2025-10-01 04:42:01.441699', 'step': 17900, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:42:01.507797', 'step': 17900, 'epoch': 3} {'type': 'loss', 'content': 0.07481805235147476, 'timestamp': '2025-10-01 04:42:01.509957', 'step': 17901, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:01.563902', 'step': 17901, 'epoch': 3} {'type': 'loss', 'content': 0.10110434889793396, 'timestamp': '2025-10-01 04:42:01.566373', 'step': 17902, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:01.633116', 'step': 17902, 'epoch': 3} {'type': 'loss', 'content': 0.025068264454603195, 'timestamp': '2025-10-01 04:42:01.635234', 'step': 17903, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:01.689322', 'step': 17903, 'epoch': 3} {'type': 'loss', 'content': 0.09502466022968292, 'timestamp': '2025-10-01 04:42:01.695092', 'step': 17904, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:01.747957', 'step': 17904, 'epoch': 3} {'type': 'loss', 'content': 0.07368077337741852, 'timestamp': '2025-10-01 04:42:01.750063', 'step': 17905, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:42:01.803595', 'step': 17905, 'epoch': 3} {'type': 'loss', 'content': 0.06258939951658249, 'timestamp': '2025-10-01 04:42:01.806156', 'step': 17906, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:42:01.859572', 'step': 17906, 'epoch': 3} {'type': 'loss', 'content': 0.058720268309116364, 'timestamp': '2025-10-01 04:42:01.861824', 'step': 17907, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:42:01.915477', 'step': 17907, 'epoch': 3} {'type': 'loss', 'content': 0.14116084575653076, 'timestamp': '2025-10-01 04:42:01.921273', 'step': 17908, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:01.973949', 'step': 17908, 'epoch': 3} {'type': 'loss', 'content': 0.15552715957164764, 'timestamp': '2025-10-01 04:42:01.976207', 'step': 17909, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:42:02.029297', 'step': 17909, 'epoch': 3} {'type': 'loss', 'content': 0.1111321970820427, 'timestamp': '2025-10-01 04:42:02.031431', 'step': 17910, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:02.085139', 'step': 17910, 'epoch': 3} {'type': 'loss', 'content': 0.08896566182374954, 'timestamp': '2025-10-01 04:42:02.087649', 'step': 17911, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:02.153629', 'step': 17911, 'epoch': 3} {'type': 'loss', 'content': 0.13733157515525818, 'timestamp': '2025-10-01 04:42:02.159623', 'step': 17912, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:02.213082', 'step': 17912, 'epoch': 3} {'type': 'loss', 'content': 0.12159344553947449, 'timestamp': '2025-10-01 04:42:02.215470', 'step': 17913, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:02.269352', 'step': 17913, 'epoch': 3} {'type': 'loss', 'content': 0.13319909572601318, 'timestamp': '2025-10-01 04:42:02.271607', 'step': 17914, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:02.325259', 'step': 17914, 'epoch': 3} {'type': 'loss', 'content': 0.0944744125008583, 'timestamp': '2025-10-01 04:42:02.327442', 'step': 17915, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:02.389161', 'step': 17915, 'epoch': 3} {'type': 'loss', 'content': 0.07977496087551117, 'timestamp': '2025-10-01 04:42:02.395134', 'step': 17916, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:02.448277', 'step': 17916, 'epoch': 3} {'type': 'loss', 'content': 0.06440439075231552, 'timestamp': '2025-10-01 04:42:02.450434', 'step': 17917, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:02.504342', 'step': 17917, 'epoch': 3} {'type': 'loss', 'content': 0.1081194207072258, 'timestamp': '2025-10-01 04:42:02.506432', 'step': 17918, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:42:02.559745', 'step': 17918, 'epoch': 3} {'type': 'loss', 'content': 0.07557567954063416, 'timestamp': '2025-10-01 04:42:02.561944', 'step': 17919, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:02.615099', 'step': 17919, 'epoch': 3} {'type': 'loss', 'content': 0.07802056521177292, 'timestamp': '2025-10-01 04:42:02.620728', 'step': 17920, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:02.673846', 'step': 17920, 'epoch': 3} {'type': 'loss', 'content': 0.04771275445818901, 'timestamp': '2025-10-01 04:42:02.676008', 'step': 17921, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:02.732485', 'step': 17921, 'epoch': 3} {'type': 'loss', 'content': 0.11440475285053253, 'timestamp': '2025-10-01 04:42:02.734593', 'step': 17922, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:02.788027', 'step': 17922, 'epoch': 3} {'type': 'loss', 'content': 0.06095312163233757, 'timestamp': '2025-10-01 04:42:02.790143', 'step': 17923, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:02.843153', 'step': 17923, 'epoch': 3} {'type': 'loss', 'content': 0.05578985810279846, 'timestamp': '2025-10-01 04:42:02.848890', 'step': 17924, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:02.901581', 'step': 17924, 'epoch': 3} {'type': 'loss', 'content': 0.01994730345904827, 'timestamp': '2025-10-01 04:42:02.904718', 'step': 17925, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:02.977994', 'step': 17925, 'epoch': 3} {'type': 'loss', 'content': 0.12772618234157562, 'timestamp': '2025-10-01 04:42:02.980022', 'step': 17926, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:42:03.033790', 'step': 17926, 'epoch': 3} {'type': 'loss', 'content': 0.12554045021533966, 'timestamp': '2025-10-01 04:42:03.035893', 'step': 17927, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:03.089307', 'step': 17927, 'epoch': 3} {'type': 'loss', 'content': 0.03422844409942627, 'timestamp': '2025-10-01 04:42:03.095032', 'step': 17928, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:03.147741', 'step': 17928, 'epoch': 3} {'type': 'loss', 'content': 0.09759360551834106, 'timestamp': '2025-10-01 04:42:03.151582', 'step': 17929, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:42:03.206097', 'step': 17929, 'epoch': 3} {'type': 'loss', 'content': 0.16179555654525757, 'timestamp': '2025-10-01 04:42:03.217901', 'step': 17930, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:03.271236', 'step': 17930, 'epoch': 3} {'type': 'loss', 'content': 0.04477746784687042, 'timestamp': '2025-10-01 04:42:03.273406', 'step': 17931, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:03.326840', 'step': 17931, 'epoch': 3} {'type': 'loss', 'content': 0.022851677611470222, 'timestamp': '2025-10-01 04:42:03.332674', 'step': 17932, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:03.385557', 'step': 17932, 'epoch': 3} {'type': 'loss', 'content': 0.09516634792089462, 'timestamp': '2025-10-01 04:42:03.388125', 'step': 17933, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:03.441451', 'step': 17933, 'epoch': 3} {'type': 'loss', 'content': 0.03977503627538681, 'timestamp': '2025-10-01 04:42:03.443570', 'step': 17934, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:03.496794', 'step': 17934, 'epoch': 3} {'type': 'loss', 'content': 0.03999503701925278, 'timestamp': '2025-10-01 04:42:03.499008', 'step': 17935, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:03.552671', 'step': 17935, 'epoch': 3} {'type': 'loss', 'content': 0.08376690000295639, 'timestamp': '2025-10-01 04:42:03.558574', 'step': 17936, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:03.611307', 'step': 17936, 'epoch': 3} {'type': 'loss', 'content': 0.09012436121702194, 'timestamp': '2025-10-01 04:42:03.613417', 'step': 17937, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:42:03.682184', 'step': 17937, 'epoch': 3} {'type': 'loss', 'content': 0.11900255084037781, 'timestamp': '2025-10-01 04:42:03.684350', 'step': 17938, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:03.737845', 'step': 17938, 'epoch': 3} {'type': 'loss', 'content': 0.036448318511247635, 'timestamp': '2025-10-01 04:42:03.740324', 'step': 17939, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:42:03.794053', 'step': 17939, 'epoch': 3} {'type': 'loss', 'content': 0.07527367025613785, 'timestamp': '2025-10-01 04:42:03.800125', 'step': 17940, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:03.853342', 'step': 17940, 'epoch': 3} {'type': 'loss', 'content': 0.09834007918834686, 'timestamp': '2025-10-01 04:42:03.856194', 'step': 17941, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:42:03.909270', 'step': 17941, 'epoch': 3} {'type': 'loss', 'content': 0.12216443568468094, 'timestamp': '2025-10-01 04:42:03.911773', 'step': 17942, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:03.965589', 'step': 17942, 'epoch': 3} {'type': 'loss', 'content': 0.1028396487236023, 'timestamp': '2025-10-01 04:42:03.967694', 'step': 17943, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:04.021473', 'step': 17943, 'epoch': 3} {'type': 'loss', 'content': 0.05635792762041092, 'timestamp': '2025-10-01 04:42:04.027151', 'step': 17944, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:42:04.080489', 'step': 17944, 'epoch': 3} {'type': 'loss', 'content': 0.0778195708990097, 'timestamp': '2025-10-01 04:42:04.082614', 'step': 17945, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:04.136346', 'step': 17945, 'epoch': 3} {'type': 'loss', 'content': 0.036756981164216995, 'timestamp': '2025-10-01 04:42:04.138377', 'step': 17946, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:04.191889', 'step': 17946, 'epoch': 3} {'type': 'loss', 'content': 0.10003574192523956, 'timestamp': '2025-10-01 04:42:04.194040', 'step': 17947, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:04.247140', 'step': 17947, 'epoch': 3} {'type': 'loss', 'content': 0.10725077986717224, 'timestamp': '2025-10-01 04:42:04.253885', 'step': 17948, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:04.308788', 'step': 17948, 'epoch': 3} {'type': 'loss', 'content': 0.0408664233982563, 'timestamp': '2025-10-01 04:42:04.311053', 'step': 17949, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:04.364450', 'step': 17949, 'epoch': 3} {'type': 'loss', 'content': 0.11018873006105423, 'timestamp': '2025-10-01 04:42:04.366590', 'step': 17950, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:04.419339', 'step': 17950, 'epoch': 3} {'type': 'loss', 'content': 0.10232049226760864, 'timestamp': '2025-10-01 04:42:04.421618', 'step': 17951, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:04.474904', 'step': 17951, 'epoch': 3} {'type': 'loss', 'content': 0.10543295741081238, 'timestamp': '2025-10-01 04:42:04.482213', 'step': 17952, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:04.534696', 'step': 17952, 'epoch': 3} {'type': 'loss', 'content': 0.07627765834331512, 'timestamp': '2025-10-01 04:42:04.536846', 'step': 17953, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:04.590398', 'step': 17953, 'epoch': 3} {'type': 'loss', 'content': 0.08321816474199295, 'timestamp': '2025-10-01 04:42:04.592797', 'step': 17954, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:42:04.646325', 'step': 17954, 'epoch': 3} {'type': 'loss', 'content': 0.09155748039484024, 'timestamp': '2025-10-01 04:42:04.648722', 'step': 17955, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:04.702361', 'step': 17955, 'epoch': 3} {'type': 'loss', 'content': 0.15291041135787964, 'timestamp': '2025-10-01 04:42:04.708179', 'step': 17956, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:04.760986', 'step': 17956, 'epoch': 3} {'type': 'loss', 'content': 0.07040068507194519, 'timestamp': '2025-10-01 04:42:04.763183', 'step': 17957, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:04.816241', 'step': 17957, 'epoch': 3} {'type': 'loss', 'content': 0.1254727691411972, 'timestamp': '2025-10-01 04:42:04.818407', 'step': 17958, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:04.872176', 'step': 17958, 'epoch': 3} {'type': 'loss', 'content': 0.024108927696943283, 'timestamp': '2025-10-01 04:42:04.874345', 'step': 17959, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:42:04.927722', 'step': 17959, 'epoch': 3} {'type': 'loss', 'content': 0.11307559907436371, 'timestamp': '2025-10-01 04:42:04.933451', 'step': 17960, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:04.986011', 'step': 17960, 'epoch': 3} {'type': 'loss', 'content': 0.059290893375873566, 'timestamp': '2025-10-01 04:42:04.988144', 'step': 17961, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:05.041105', 'step': 17961, 'epoch': 3} {'type': 'loss', 'content': 0.05885211378335953, 'timestamp': '2025-10-01 04:42:05.043131', 'step': 17962, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:05.096396', 'step': 17962, 'epoch': 3} {'type': 'loss', 'content': 0.11259010434150696, 'timestamp': '2025-10-01 04:42:05.098564', 'step': 17963, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:05.152028', 'step': 17963, 'epoch': 3} {'type': 'loss', 'content': 0.10610748827457428, 'timestamp': '2025-10-01 04:42:05.157701', 'step': 17964, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:05.210190', 'step': 17964, 'epoch': 3} {'type': 'loss', 'content': 0.19119800627231598, 'timestamp': '2025-10-01 04:42:05.212522', 'step': 17965, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:05.276118', 'step': 17965, 'epoch': 3} {'type': 'loss', 'content': 0.04737825319170952, 'timestamp': '2025-10-01 04:42:05.278255', 'step': 17966, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:05.332129', 'step': 17966, 'epoch': 3} {'type': 'loss', 'content': 0.147882878780365, 'timestamp': '2025-10-01 04:42:05.334263', 'step': 17967, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:42:05.387411', 'step': 17967, 'epoch': 3} {'type': 'loss', 'content': 0.05995585024356842, 'timestamp': '2025-10-01 04:42:05.393532', 'step': 17968, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:05.446345', 'step': 17968, 'epoch': 3} {'type': 'loss', 'content': 0.07328850775957108, 'timestamp': '2025-10-01 04:42:05.448891', 'step': 17969, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:05.502367', 'step': 17969, 'epoch': 3} {'type': 'loss', 'content': 0.054060548543930054, 'timestamp': '2025-10-01 04:42:05.505248', 'step': 17970, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:05.558431', 'step': 17970, 'epoch': 3} {'type': 'loss', 'content': 0.1403709352016449, 'timestamp': '2025-10-01 04:42:05.560742', 'step': 17971, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:05.619898', 'step': 17971, 'epoch': 3} {'type': 'loss', 'content': 0.10791689157485962, 'timestamp': '2025-10-01 04:42:05.625681', 'step': 17972, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:05.679522', 'step': 17972, 'epoch': 3} {'type': 'loss', 'content': 0.11864068359136581, 'timestamp': '2025-10-01 04:42:05.681645', 'step': 17973, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:05.736375', 'step': 17973, 'epoch': 3} {'type': 'loss', 'content': 0.09721691161394119, 'timestamp': '2025-10-01 04:42:05.739680', 'step': 17974, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:05.797137', 'step': 17974, 'epoch': 3} {'type': 'loss', 'content': 0.017456084489822388, 'timestamp': '2025-10-01 04:42:05.799755', 'step': 17975, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:05.867482', 'step': 17975, 'epoch': 3} {'type': 'loss', 'content': 0.1543470025062561, 'timestamp': '2025-10-01 04:42:05.873650', 'step': 17976, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:05.945642', 'step': 17976, 'epoch': 3} {'type': 'loss', 'content': 0.05123439431190491, 'timestamp': '2025-10-01 04:42:05.947921', 'step': 17977, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:06.001789', 'step': 17977, 'epoch': 3} {'type': 'loss', 'content': 0.16024254262447357, 'timestamp': '2025-10-01 04:42:06.004021', 'step': 17978, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:06.058252', 'step': 17978, 'epoch': 3} {'type': 'loss', 'content': 0.09790478646755219, 'timestamp': '2025-10-01 04:42:06.060726', 'step': 17979, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:06.115095', 'step': 17979, 'epoch': 3} {'type': 'loss', 'content': 0.1258666068315506, 'timestamp': '2025-10-01 04:42:06.121259', 'step': 17980, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:42:06.174885', 'step': 17980, 'epoch': 3} {'type': 'loss', 'content': 0.014761751517653465, 'timestamp': '2025-10-01 04:42:06.177344', 'step': 17981, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:06.242943', 'step': 17981, 'epoch': 3} {'type': 'loss', 'content': 0.0789749026298523, 'timestamp': '2025-10-01 04:42:06.245302', 'step': 17982, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:06.313448', 'step': 17982, 'epoch': 3} {'type': 'loss', 'content': 0.12440143525600433, 'timestamp': '2025-10-01 04:42:06.316344', 'step': 17983, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:06.371004', 'step': 17983, 'epoch': 3} {'type': 'loss', 'content': 0.12138295918703079, 'timestamp': '2025-10-01 04:42:06.387348', 'step': 17984, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:06.442141', 'step': 17984, 'epoch': 3} {'type': 'loss', 'content': 0.055729299783706665, 'timestamp': '2025-10-01 04:42:06.444614', 'step': 17985, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:06.499686', 'step': 17985, 'epoch': 3} {'type': 'loss', 'content': 0.05462728813290596, 'timestamp': '2025-10-01 04:42:06.502086', 'step': 17986, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:06.557942', 'step': 17986, 'epoch': 3} {'type': 'loss', 'content': 0.0867290273308754, 'timestamp': '2025-10-01 04:42:06.560766', 'step': 17987, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:06.615169', 'step': 17987, 'epoch': 3} {'type': 'loss', 'content': 0.06598909199237823, 'timestamp': '2025-10-01 04:42:06.621676', 'step': 17988, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:06.675726', 'step': 17988, 'epoch': 3} {'type': 'loss', 'content': 0.16643238067626953, 'timestamp': '2025-10-01 04:42:06.678120', 'step': 17989, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:06.732166', 'step': 17989, 'epoch': 3} {'type': 'loss', 'content': 0.10228012502193451, 'timestamp': '2025-10-01 04:42:06.734523', 'step': 17990, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:06.788853', 'step': 17990, 'epoch': 3} {'type': 'loss', 'content': 0.22722022235393524, 'timestamp': '2025-10-01 04:42:06.791100', 'step': 17991, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:06.845350', 'step': 17991, 'epoch': 3} {'type': 'loss', 'content': 0.06600139290094376, 'timestamp': '2025-10-01 04:42:06.852188', 'step': 17992, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:06.908416', 'step': 17992, 'epoch': 3} {'type': 'loss', 'content': 0.12878566980361938, 'timestamp': '2025-10-01 04:42:06.910925', 'step': 17993, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:06.965852', 'step': 17993, 'epoch': 3} {'type': 'loss', 'content': 0.06234842911362648, 'timestamp': '2025-10-01 04:42:06.968657', 'step': 17994, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:42:07.023769', 'step': 17994, 'epoch': 3} {'type': 'loss', 'content': 0.10465297847986221, 'timestamp': '2025-10-01 04:42:07.026151', 'step': 17995, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:07.080503', 'step': 17995, 'epoch': 3} {'type': 'loss', 'content': 0.04927420616149902, 'timestamp': '2025-10-01 04:42:07.086738', 'step': 17996, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:07.140285', 'step': 17996, 'epoch': 3} {'type': 'loss', 'content': 0.12922734022140503, 'timestamp': '2025-10-01 04:42:07.143105', 'step': 17997, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:07.197716', 'step': 17997, 'epoch': 3} {'type': 'loss', 'content': 0.09721140563488007, 'timestamp': '2025-10-01 04:42:07.199886', 'step': 17998, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:07.254855', 'step': 17998, 'epoch': 3} {'type': 'loss', 'content': 0.08549810945987701, 'timestamp': '2025-10-01 04:42:07.257012', 'step': 17999, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:07.310196', 'step': 17999, 'epoch': 3} {'type': 'loss', 'content': 0.15514762699604034, 'timestamp': '2025-10-01 04:42:07.315948', 'step': 18000, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 18000', 'timestamp': '2025-10-01 04:42:07.689193', 'step': 18000, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:07.745889', 'step': 18000, 'epoch': 3} {'type': 'loss', 'content': 0.0619228333234787, 'timestamp': '2025-10-01 04:42:07.748096', 'step': 18001, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:07.801914', 'step': 18001, 'epoch': 3} {'type': 'loss', 'content': 0.10171990096569061, 'timestamp': '2025-10-01 04:42:07.803930', 'step': 18002, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:07.857478', 'step': 18002, 'epoch': 3} {'type': 'loss', 'content': 0.1306503862142563, 'timestamp': '2025-10-01 04:42:07.859664', 'step': 18003, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:07.912996', 'step': 18003, 'epoch': 3} {'type': 'loss', 'content': 0.08526216447353363, 'timestamp': '2025-10-01 04:42:07.918938', 'step': 18004, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:42:07.971926', 'step': 18004, 'epoch': 3} {'type': 'loss', 'content': 0.02877821959555149, 'timestamp': '2025-10-01 04:42:07.974227', 'step': 18005, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:42:08.028822', 'step': 18005, 'epoch': 3} {'type': 'loss', 'content': 0.06463327258825302, 'timestamp': '2025-10-01 04:42:08.031012', 'step': 18006, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:08.084540', 'step': 18006, 'epoch': 3} {'type': 'loss', 'content': 0.0447215735912323, 'timestamp': '2025-10-01 04:42:08.086676', 'step': 18007, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:42:08.140210', 'step': 18007, 'epoch': 3} {'type': 'loss', 'content': 0.11822530627250671, 'timestamp': '2025-10-01 04:42:08.146051', 'step': 18008, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:08.198854', 'step': 18008, 'epoch': 3} {'type': 'loss', 'content': 0.1429007351398468, 'timestamp': '2025-10-01 04:42:08.201631', 'step': 18009, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:08.254545', 'step': 18009, 'epoch': 3} {'type': 'loss', 'content': 0.04457365721464157, 'timestamp': '2025-10-01 04:42:08.256737', 'step': 18010, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:08.310362', 'step': 18010, 'epoch': 3} {'type': 'loss', 'content': 0.1366238296031952, 'timestamp': '2025-10-01 04:42:08.312576', 'step': 18011, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:08.365799', 'step': 18011, 'epoch': 3} {'type': 'loss', 'content': 0.14107610285282135, 'timestamp': '2025-10-01 04:42:08.371733', 'step': 18012, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:08.424802', 'step': 18012, 'epoch': 3} {'type': 'loss', 'content': 0.11935243755578995, 'timestamp': '2025-10-01 04:42:08.426856', 'step': 18013, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:08.479890', 'step': 18013, 'epoch': 3} {'type': 'loss', 'content': 0.11833050847053528, 'timestamp': '2025-10-01 04:42:08.482890', 'step': 18014, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:08.539493', 'step': 18014, 'epoch': 3} {'type': 'loss', 'content': 0.08901295065879822, 'timestamp': '2025-10-01 04:42:08.541609', 'step': 18015, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:08.596591', 'step': 18015, 'epoch': 3} {'type': 'loss', 'content': 0.0874166339635849, 'timestamp': '2025-10-01 04:42:08.602484', 'step': 18016, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:08.654930', 'step': 18016, 'epoch': 3} {'type': 'loss', 'content': 0.05220261216163635, 'timestamp': '2025-10-01 04:42:08.657084', 'step': 18017, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:08.710813', 'step': 18017, 'epoch': 3} {'type': 'loss', 'content': 0.07946663349866867, 'timestamp': '2025-10-01 04:42:08.714495', 'step': 18018, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:08.768122', 'step': 18018, 'epoch': 3} {'type': 'loss', 'content': 0.08265308290719986, 'timestamp': '2025-10-01 04:42:08.770709', 'step': 18019, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:08.824765', 'step': 18019, 'epoch': 3} {'type': 'loss', 'content': 0.08761341869831085, 'timestamp': '2025-10-01 04:42:08.830491', 'step': 18020, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:42:08.883595', 'step': 18020, 'epoch': 3} {'type': 'loss', 'content': 0.13357456028461456, 'timestamp': '2025-10-01 04:42:08.885760', 'step': 18021, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:08.940921', 'step': 18021, 'epoch': 3} {'type': 'loss', 'content': 0.06643357127904892, 'timestamp': '2025-10-01 04:42:08.953613', 'step': 18022, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:09.008068', 'step': 18022, 'epoch': 3} {'type': 'loss', 'content': 0.13977159559726715, 'timestamp': '2025-10-01 04:42:09.022132', 'step': 18023, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:09.076079', 'step': 18023, 'epoch': 3} {'type': 'loss', 'content': 0.057807423174381256, 'timestamp': '2025-10-01 04:42:09.082076', 'step': 18024, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:09.136538', 'step': 18024, 'epoch': 3} {'type': 'loss', 'content': 0.10889916867017746, 'timestamp': '2025-10-01 04:42:09.138861', 'step': 18025, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:42:09.193359', 'step': 18025, 'epoch': 3} {'type': 'loss', 'content': 0.09058012068271637, 'timestamp': '2025-10-01 04:42:09.195894', 'step': 18026, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:09.265172', 'step': 18026, 'epoch': 3} {'type': 'loss', 'content': 0.06688400357961655, 'timestamp': '2025-10-01 04:42:09.267385', 'step': 18027, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:09.332300', 'step': 18027, 'epoch': 3} {'type': 'loss', 'content': 0.06921838968992233, 'timestamp': '2025-10-01 04:42:09.339207', 'step': 18028, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:09.395215', 'step': 18028, 'epoch': 3} {'type': 'loss', 'content': 0.04165792465209961, 'timestamp': '2025-10-01 04:42:09.397750', 'step': 18029, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:09.453265', 'step': 18029, 'epoch': 3} {'type': 'loss', 'content': 0.05009790509939194, 'timestamp': '2025-10-01 04:42:09.455647', 'step': 18030, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:09.511159', 'step': 18030, 'epoch': 3} {'type': 'loss', 'content': 0.07272179424762726, 'timestamp': '2025-10-01 04:42:09.514522', 'step': 18031, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:09.569407', 'step': 18031, 'epoch': 3} {'type': 'loss', 'content': 0.13184460997581482, 'timestamp': '2025-10-01 04:42:09.576280', 'step': 18032, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:09.630502', 'step': 18032, 'epoch': 3} {'type': 'loss', 'content': 0.07597662508487701, 'timestamp': '2025-10-01 04:42:09.632236', 'step': 18033, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:09.685825', 'step': 18033, 'epoch': 3} {'type': 'loss', 'content': 0.10129042714834213, 'timestamp': '2025-10-01 04:42:09.688372', 'step': 18034, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:09.745010', 'step': 18034, 'epoch': 3} {'type': 'loss', 'content': 0.08744817227125168, 'timestamp': '2025-10-01 04:42:09.748126', 'step': 18035, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:42:09.808488', 'step': 18035, 'epoch': 3} {'type': 'loss', 'content': 0.1626051664352417, 'timestamp': '2025-10-01 04:42:09.814858', 'step': 18036, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:09.879860', 'step': 18036, 'epoch': 3} {'type': 'loss', 'content': 0.12910936772823334, 'timestamp': '2025-10-01 04:42:09.883021', 'step': 18037, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:09.938256', 'step': 18037, 'epoch': 3} {'type': 'loss', 'content': 0.062217406928539276, 'timestamp': '2025-10-01 04:42:09.941109', 'step': 18038, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:42:09.995468', 'step': 18038, 'epoch': 3} {'type': 'loss', 'content': 0.10298064351081848, 'timestamp': '2025-10-01 04:42:09.998522', 'step': 18039, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:10.058119', 'step': 18039, 'epoch': 3} {'type': 'loss', 'content': 0.004733675625175238, 'timestamp': '2025-10-01 04:42:10.064350', 'step': 18040, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:10.118026', 'step': 18040, 'epoch': 3} {'type': 'loss', 'content': 0.0740465596318245, 'timestamp': '2025-10-01 04:42:10.120244', 'step': 18041, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:42:10.186730', 'step': 18041, 'epoch': 3} {'type': 'loss', 'content': 0.08615894615650177, 'timestamp': '2025-10-01 04:42:10.189279', 'step': 18042, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:10.243460', 'step': 18042, 'epoch': 3} {'type': 'loss', 'content': 0.07305280864238739, 'timestamp': '2025-10-01 04:42:10.246192', 'step': 18043, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:10.302382', 'step': 18043, 'epoch': 3} {'type': 'loss', 'content': 0.06273775547742844, 'timestamp': '2025-10-01 04:42:10.308646', 'step': 18044, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:10.371557', 'step': 18044, 'epoch': 3} {'type': 'loss', 'content': 0.07986976951360703, 'timestamp': '2025-10-01 04:42:10.373785', 'step': 18045, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:10.427989', 'step': 18045, 'epoch': 3} {'type': 'loss', 'content': 0.09796962887048721, 'timestamp': '2025-10-01 04:42:10.430245', 'step': 18046, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:42:10.484372', 'step': 18046, 'epoch': 3} {'type': 'loss', 'content': 0.030712313950061798, 'timestamp': '2025-10-01 04:42:10.486932', 'step': 18047, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:42:10.542048', 'step': 18047, 'epoch': 3} {'type': 'loss', 'content': 0.049014344811439514, 'timestamp': '2025-10-01 04:42:10.548836', 'step': 18048, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:42:10.605665', 'step': 18048, 'epoch': 3} {'type': 'loss', 'content': 0.07004332542419434, 'timestamp': '2025-10-01 04:42:10.607899', 'step': 18049, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:10.663713', 'step': 18049, 'epoch': 3} {'type': 'loss', 'content': 0.0934821367263794, 'timestamp': '2025-10-01 04:42:10.666131', 'step': 18050, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:10.723766', 'step': 18050, 'epoch': 3} {'type': 'loss', 'content': 0.10873954743146896, 'timestamp': '2025-10-01 04:42:10.726037', 'step': 18051, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:10.780902', 'step': 18051, 'epoch': 3} {'type': 'loss', 'content': 0.11263247579336166, 'timestamp': '2025-10-01 04:42:10.787107', 'step': 18052, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:10.840453', 'step': 18052, 'epoch': 3} {'type': 'loss', 'content': 0.13648968935012817, 'timestamp': '2025-10-01 04:42:10.842703', 'step': 18053, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:10.897360', 'step': 18053, 'epoch': 3} {'type': 'loss', 'content': 0.09046542644500732, 'timestamp': '2025-10-01 04:42:10.899598', 'step': 18054, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:10.954492', 'step': 18054, 'epoch': 3} {'type': 'loss', 'content': 0.10908040404319763, 'timestamp': '2025-10-01 04:42:10.957342', 'step': 18055, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:11.017053', 'step': 18055, 'epoch': 3} {'type': 'loss', 'content': 0.10659030824899673, 'timestamp': '2025-10-01 04:42:11.023914', 'step': 18056, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:42:11.080264', 'step': 18056, 'epoch': 3} {'type': 'loss', 'content': 0.07277297973632812, 'timestamp': '2025-10-01 04:42:11.082495', 'step': 18057, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:11.137478', 'step': 18057, 'epoch': 3} {'type': 'loss', 'content': 0.07468269020318985, 'timestamp': '2025-10-01 04:42:11.139783', 'step': 18058, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:11.194327', 'step': 18058, 'epoch': 3} {'type': 'loss', 'content': 0.0668342337012291, 'timestamp': '2025-10-01 04:42:11.196572', 'step': 18059, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:11.254826', 'step': 18059, 'epoch': 3} {'type': 'loss', 'content': 0.13009609282016754, 'timestamp': '2025-10-01 04:42:11.261285', 'step': 18060, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-01 04:42:11.315715', 'step': 18060, 'epoch': 3} {'type': 'loss', 'content': 0.1457025110721588, 'timestamp': '2025-10-01 04:42:11.318078', 'step': 18061, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:11.376651', 'step': 18061, 'epoch': 3} {'type': 'loss', 'content': 0.04093726724386215, 'timestamp': '2025-10-01 04:42:11.378879', 'step': 18062, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:11.444085', 'step': 18062, 'epoch': 3} {'type': 'loss', 'content': 0.1432313770055771, 'timestamp': '2025-10-01 04:42:11.446299', 'step': 18063, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:11.503995', 'step': 18063, 'epoch': 3} {'type': 'loss', 'content': 0.04496361315250397, 'timestamp': '2025-10-01 04:42:11.512360', 'step': 18064, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:11.565421', 'step': 18064, 'epoch': 3} {'type': 'loss', 'content': 0.12855973839759827, 'timestamp': '2025-10-01 04:42:11.567721', 'step': 18065, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:11.621246', 'step': 18065, 'epoch': 3} {'type': 'loss', 'content': 0.13727141916751862, 'timestamp': '2025-10-01 04:42:11.623345', 'step': 18066, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:11.676856', 'step': 18066, 'epoch': 3} {'type': 'loss', 'content': 0.09643027186393738, 'timestamp': '2025-10-01 04:42:11.679410', 'step': 18067, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:11.733736', 'step': 18067, 'epoch': 3} {'type': 'loss', 'content': 0.1336575150489807, 'timestamp': '2025-10-01 04:42:11.740366', 'step': 18068, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:11.793233', 'step': 18068, 'epoch': 3} {'type': 'loss', 'content': 0.08713600784540176, 'timestamp': '2025-10-01 04:42:11.796222', 'step': 18069, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:11.849810', 'step': 18069, 'epoch': 3} {'type': 'loss', 'content': 0.10782250761985779, 'timestamp': '2025-10-01 04:42:11.852176', 'step': 18070, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:11.921468', 'step': 18070, 'epoch': 3} {'type': 'loss', 'content': 0.1359684020280838, 'timestamp': '2025-10-01 04:42:11.926656', 'step': 18071, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:11.980729', 'step': 18071, 'epoch': 3} {'type': 'loss', 'content': 0.05479327216744423, 'timestamp': '2025-10-01 04:42:11.986788', 'step': 18072, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:12.039630', 'step': 18072, 'epoch': 3} {'type': 'loss', 'content': 0.10096628218889236, 'timestamp': '2025-10-01 04:42:12.041804', 'step': 18073, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:12.095879', 'step': 18073, 'epoch': 3} {'type': 'loss', 'content': 0.103662870824337, 'timestamp': '2025-10-01 04:42:12.098969', 'step': 18074, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:12.152733', 'step': 18074, 'epoch': 3} {'type': 'loss', 'content': 0.10534685105085373, 'timestamp': '2025-10-01 04:42:12.166322', 'step': 18075, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:12.219794', 'step': 18075, 'epoch': 3} {'type': 'loss', 'content': 0.09726601094007492, 'timestamp': '2025-10-01 04:42:12.225802', 'step': 18076, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:42:12.278983', 'step': 18076, 'epoch': 3} {'type': 'loss', 'content': 0.09273716062307358, 'timestamp': '2025-10-01 04:42:12.281010', 'step': 18077, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:12.334459', 'step': 18077, 'epoch': 3} {'type': 'loss', 'content': 0.10610098391771317, 'timestamp': '2025-10-01 04:42:12.336595', 'step': 18078, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:12.390248', 'step': 18078, 'epoch': 3} {'type': 'loss', 'content': 0.07458188384771347, 'timestamp': '2025-10-01 04:42:12.393261', 'step': 18079, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:12.447115', 'step': 18079, 'epoch': 3} {'type': 'loss', 'content': 0.10222013294696808, 'timestamp': '2025-10-01 04:42:12.452789', 'step': 18080, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:42:12.505802', 'step': 18080, 'epoch': 3} {'type': 'loss', 'content': 0.12271735072135925, 'timestamp': '2025-10-01 04:42:12.508555', 'step': 18081, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:12.561846', 'step': 18081, 'epoch': 3} {'type': 'loss', 'content': 0.100460946559906, 'timestamp': '2025-10-01 04:42:12.564164', 'step': 18082, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:12.618896', 'step': 18082, 'epoch': 3} {'type': 'loss', 'content': 0.16752231121063232, 'timestamp': '2025-10-01 04:42:12.621353', 'step': 18083, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:12.674874', 'step': 18083, 'epoch': 3} {'type': 'loss', 'content': 0.11333008855581284, 'timestamp': '2025-10-01 04:42:12.680945', 'step': 18084, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:42:12.739499', 'step': 18084, 'epoch': 3} {'type': 'loss', 'content': 0.09166962653398514, 'timestamp': '2025-10-01 04:42:12.741752', 'step': 18085, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:12.797125', 'step': 18085, 'epoch': 3} {'type': 'loss', 'content': 0.11198415607213974, 'timestamp': '2025-10-01 04:42:12.799548', 'step': 18086, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:12.853336', 'step': 18086, 'epoch': 3} {'type': 'loss', 'content': 0.16892290115356445, 'timestamp': '2025-10-01 04:42:12.855491', 'step': 18087, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:42:12.909050', 'step': 18087, 'epoch': 3} {'type': 'loss', 'content': 0.13367171585559845, 'timestamp': '2025-10-01 04:42:12.914580', 'step': 18088, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:12.968593', 'step': 18088, 'epoch': 3} {'type': 'loss', 'content': 0.06463950872421265, 'timestamp': '2025-10-01 04:42:12.971563', 'step': 18089, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:13.025112', 'step': 18089, 'epoch': 3} {'type': 'loss', 'content': 0.08422525227069855, 'timestamp': '2025-10-01 04:42:13.027323', 'step': 18090, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:13.080700', 'step': 18090, 'epoch': 3} {'type': 'loss', 'content': 0.07033854722976685, 'timestamp': '2025-10-01 04:42:13.085055', 'step': 18091, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:13.141409', 'step': 18091, 'epoch': 3} {'type': 'loss', 'content': 0.1305086761713028, 'timestamp': '2025-10-01 04:42:13.147244', 'step': 18092, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:13.204292', 'step': 18092, 'epoch': 3} {'type': 'loss', 'content': 0.1391306221485138, 'timestamp': '2025-10-01 04:42:13.206565', 'step': 18093, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:13.262091', 'step': 18093, 'epoch': 3} {'type': 'loss', 'content': 0.1247052252292633, 'timestamp': '2025-10-01 04:42:13.264250', 'step': 18094, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:42:13.333873', 'step': 18094, 'epoch': 3} {'type': 'loss', 'content': 0.126578226685524, 'timestamp': '2025-10-01 04:42:13.347859', 'step': 18095, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:13.410647', 'step': 18095, 'epoch': 3} {'type': 'loss', 'content': 0.08751696348190308, 'timestamp': '2025-10-01 04:42:13.417182', 'step': 18096, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:13.472984', 'step': 18096, 'epoch': 3} {'type': 'loss', 'content': 0.10675661265850067, 'timestamp': '2025-10-01 04:42:13.486260', 'step': 18097, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:13.543971', 'step': 18097, 'epoch': 3} {'type': 'loss', 'content': 0.1255638599395752, 'timestamp': '2025-10-01 04:42:13.546233', 'step': 18098, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:13.603222', 'step': 18098, 'epoch': 3} {'type': 'loss', 'content': 0.10755285620689392, 'timestamp': '2025-10-01 04:42:13.605322', 'step': 18099, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:13.666696', 'step': 18099, 'epoch': 3} {'type': 'loss', 'content': 0.09345772117376328, 'timestamp': '2025-10-01 04:42:13.673130', 'step': 18100, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:13.726780', 'step': 18100, 'epoch': 3} {'type': 'loss', 'content': 0.06415774673223495, 'timestamp': '2025-10-01 04:42:13.730222', 'step': 18101, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:13.786946', 'step': 18101, 'epoch': 3} {'type': 'loss', 'content': 0.030829619616270065, 'timestamp': '2025-10-01 04:42:13.792254', 'step': 18102, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:13.858956', 'step': 18102, 'epoch': 3} {'type': 'loss', 'content': 0.05526594817638397, 'timestamp': '2025-10-01 04:42:13.861132', 'step': 18103, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:13.914955', 'step': 18103, 'epoch': 3} {'type': 'loss', 'content': 0.08991768956184387, 'timestamp': '2025-10-01 04:42:13.921190', 'step': 18104, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:13.978350', 'step': 18104, 'epoch': 3} {'type': 'loss', 'content': 0.15507857501506805, 'timestamp': '2025-10-01 04:42:13.980421', 'step': 18105, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:14.033668', 'step': 18105, 'epoch': 3} {'type': 'loss', 'content': 0.052716754376888275, 'timestamp': '2025-10-01 04:42:14.035801', 'step': 18106, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:14.092821', 'step': 18106, 'epoch': 3} {'type': 'loss', 'content': 0.07303272932767868, 'timestamp': '2025-10-01 04:42:14.096855', 'step': 18107, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:14.161809', 'step': 18107, 'epoch': 3} {'type': 'loss', 'content': 0.1312597244977951, 'timestamp': '2025-10-01 04:42:14.167896', 'step': 18108, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:14.220771', 'step': 18108, 'epoch': 3} {'type': 'loss', 'content': 0.07472223043441772, 'timestamp': '2025-10-01 04:42:14.224139', 'step': 18109, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:14.281607', 'step': 18109, 'epoch': 3} {'type': 'loss', 'content': 0.05372574180364609, 'timestamp': '2025-10-01 04:42:14.284023', 'step': 18110, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:14.344307', 'step': 18110, 'epoch': 3} {'type': 'loss', 'content': 0.07137929648160934, 'timestamp': '2025-10-01 04:42:14.346711', 'step': 18111, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:14.401133', 'step': 18111, 'epoch': 3} {'type': 'loss', 'content': 0.05311339721083641, 'timestamp': '2025-10-01 04:42:14.408024', 'step': 18112, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:14.468315', 'step': 18112, 'epoch': 3} {'type': 'loss', 'content': 0.04321694001555443, 'timestamp': '2025-10-01 04:42:14.470448', 'step': 18113, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:42:14.525070', 'step': 18113, 'epoch': 3} {'type': 'loss', 'content': 0.11548754572868347, 'timestamp': '2025-10-01 04:42:14.528078', 'step': 18114, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:14.582587', 'step': 18114, 'epoch': 3} {'type': 'loss', 'content': 0.04128163307905197, 'timestamp': '2025-10-01 04:42:14.586605', 'step': 18115, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:14.644404', 'step': 18115, 'epoch': 3} {'type': 'loss', 'content': 0.05353470519185066, 'timestamp': '2025-10-01 04:42:14.651080', 'step': 18116, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:14.704778', 'step': 18116, 'epoch': 3} {'type': 'loss', 'content': 0.04535369575023651, 'timestamp': '2025-10-01 04:42:14.706923', 'step': 18117, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:42:14.760199', 'step': 18117, 'epoch': 3} {'type': 'loss', 'content': 0.04421661049127579, 'timestamp': '2025-10-01 04:42:14.764248', 'step': 18118, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:14.818934', 'step': 18118, 'epoch': 3} {'type': 'loss', 'content': 0.10296615213155746, 'timestamp': '2025-10-01 04:42:14.821436', 'step': 18119, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:14.875755', 'step': 18119, 'epoch': 3} {'type': 'loss', 'content': 0.0861969143152237, 'timestamp': '2025-10-01 04:42:14.883627', 'step': 18120, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:14.939843', 'step': 18120, 'epoch': 3} {'type': 'loss', 'content': 0.11373654752969742, 'timestamp': '2025-10-01 04:42:14.942014', 'step': 18121, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:14.997707', 'step': 18121, 'epoch': 3} {'type': 'loss', 'content': 0.13101932406425476, 'timestamp': '2025-10-01 04:42:14.999976', 'step': 18122, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:15.053802', 'step': 18122, 'epoch': 3} {'type': 'loss', 'content': 0.12877404689788818, 'timestamp': '2025-10-01 04:42:15.055925', 'step': 18123, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:15.109780', 'step': 18123, 'epoch': 3} {'type': 'loss', 'content': 0.1283337026834488, 'timestamp': '2025-10-01 04:42:15.115847', 'step': 18124, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:15.169189', 'step': 18124, 'epoch': 3} {'type': 'loss', 'content': 0.08131089061498642, 'timestamp': '2025-10-01 04:42:15.171342', 'step': 18125, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:15.224934', 'step': 18125, 'epoch': 3} {'type': 'loss', 'content': 0.08945509791374207, 'timestamp': '2025-10-01 04:42:15.227233', 'step': 18126, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:42:15.281145', 'step': 18126, 'epoch': 3} {'type': 'loss', 'content': 0.10824321955442429, 'timestamp': '2025-10-01 04:42:15.283407', 'step': 18127, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:42:15.350811', 'step': 18127, 'epoch': 3} {'type': 'loss', 'content': 0.06768817454576492, 'timestamp': '2025-10-01 04:42:15.356470', 'step': 18128, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:15.409167', 'step': 18128, 'epoch': 3} {'type': 'loss', 'content': 0.07641512155532837, 'timestamp': '2025-10-01 04:42:15.411909', 'step': 18129, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:15.466765', 'step': 18129, 'epoch': 3} {'type': 'loss', 'content': 0.1490110158920288, 'timestamp': '2025-10-01 04:42:15.469564', 'step': 18130, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:15.524396', 'step': 18130, 'epoch': 3} {'type': 'loss', 'content': 0.09239533543586731, 'timestamp': '2025-10-01 04:42:15.526596', 'step': 18131, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:15.581633', 'step': 18131, 'epoch': 3} {'type': 'loss', 'content': 0.06698025017976761, 'timestamp': '2025-10-01 04:42:15.587817', 'step': 18132, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:15.641995', 'step': 18132, 'epoch': 3} {'type': 'loss', 'content': 0.09582069516181946, 'timestamp': '2025-10-01 04:42:15.645330', 'step': 18133, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:15.699612', 'step': 18133, 'epoch': 3} {'type': 'loss', 'content': 0.07128535211086273, 'timestamp': '2025-10-01 04:42:15.701770', 'step': 18134, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:15.761411', 'step': 18134, 'epoch': 3} {'type': 'loss', 'content': 0.11338062584400177, 'timestamp': '2025-10-01 04:42:15.763821', 'step': 18135, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:15.823621', 'step': 18135, 'epoch': 3} {'type': 'loss', 'content': 0.08930440992116928, 'timestamp': '2025-10-01 04:42:15.831089', 'step': 18136, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:15.889856', 'step': 18136, 'epoch': 3} {'type': 'loss', 'content': 0.16001005470752716, 'timestamp': '2025-10-01 04:42:15.892023', 'step': 18137, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:42:15.950893', 'step': 18137, 'epoch': 3} {'type': 'loss', 'content': 0.08748840540647507, 'timestamp': '2025-10-01 04:42:15.954056', 'step': 18138, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:16.014774', 'step': 18138, 'epoch': 3} {'type': 'loss', 'content': 0.06803490221500397, 'timestamp': '2025-10-01 04:42:16.016905', 'step': 18139, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:16.076875', 'step': 18139, 'epoch': 3} {'type': 'loss', 'content': 0.0744004175066948, 'timestamp': '2025-10-01 04:42:16.084264', 'step': 18140, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:16.140381', 'step': 18140, 'epoch': 3} {'type': 'loss', 'content': 0.14282387495040894, 'timestamp': '2025-10-01 04:42:16.142591', 'step': 18141, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:16.197351', 'step': 18141, 'epoch': 3} {'type': 'loss', 'content': 0.10854807496070862, 'timestamp': '2025-10-01 04:42:16.199145', 'step': 18142, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:16.252429', 'step': 18142, 'epoch': 3} {'type': 'loss', 'content': 0.13765163719654083, 'timestamp': '2025-10-01 04:42:16.254562', 'step': 18143, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-01 04:42:16.308306', 'step': 18143, 'epoch': 3} {'type': 'loss', 'content': 0.1463921219110489, 'timestamp': '2025-10-01 04:42:16.314293', 'step': 18144, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:42:16.366764', 'step': 18144, 'epoch': 3} {'type': 'loss', 'content': 0.07758178561925888, 'timestamp': '2025-10-01 04:42:16.368908', 'step': 18145, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:16.422888', 'step': 18145, 'epoch': 3} {'type': 'loss', 'content': 0.08843784779310226, 'timestamp': '2025-10-01 04:42:16.424988', 'step': 18146, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:16.479658', 'step': 18146, 'epoch': 3} {'type': 'loss', 'content': 0.1582133173942566, 'timestamp': '2025-10-01 04:42:16.481889', 'step': 18147, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:16.537254', 'step': 18147, 'epoch': 3} {'type': 'loss', 'content': 0.15509501099586487, 'timestamp': '2025-10-01 04:42:16.543808', 'step': 18148, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:16.599175', 'step': 18148, 'epoch': 3} {'type': 'loss', 'content': 0.09300632774829865, 'timestamp': '2025-10-01 04:42:16.601300', 'step': 18149, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:16.657434', 'step': 18149, 'epoch': 3} {'type': 'loss', 'content': 0.09555549174547195, 'timestamp': '2025-10-01 04:42:16.660667', 'step': 18150, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:42:16.716857', 'step': 18150, 'epoch': 3} {'type': 'loss', 'content': 0.12301108986139297, 'timestamp': '2025-10-01 04:42:16.719084', 'step': 18151, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:16.775019', 'step': 18151, 'epoch': 3} {'type': 'loss', 'content': 0.06352845579385757, 'timestamp': '2025-10-01 04:42:16.781691', 'step': 18152, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:42:16.836903', 'step': 18152, 'epoch': 3} {'type': 'loss', 'content': 0.0344601534307003, 'timestamp': '2025-10-01 04:42:16.838996', 'step': 18153, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:16.894202', 'step': 18153, 'epoch': 3} {'type': 'loss', 'content': 0.0953323021531105, 'timestamp': '2025-10-01 04:42:16.896269', 'step': 18154, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:42:16.952079', 'step': 18154, 'epoch': 3} {'type': 'loss', 'content': 0.2124256044626236, 'timestamp': '2025-10-01 04:42:16.954440', 'step': 18155, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:17.010055', 'step': 18155, 'epoch': 3} {'type': 'loss', 'content': 0.09693340212106705, 'timestamp': '2025-10-01 04:42:17.027727', 'step': 18156, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:17.083096', 'step': 18156, 'epoch': 3} {'type': 'loss', 'content': 0.13418039679527283, 'timestamp': '2025-10-01 04:42:17.085182', 'step': 18157, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:17.139237', 'step': 18157, 'epoch': 3} {'type': 'loss', 'content': 0.16329680383205414, 'timestamp': '2025-10-01 04:42:17.141328', 'step': 18158, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:17.203789', 'step': 18158, 'epoch': 3} {'type': 'loss', 'content': 0.11235926300287247, 'timestamp': '2025-10-01 04:42:17.205989', 'step': 18159, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:17.266135', 'step': 18159, 'epoch': 3} {'type': 'loss', 'content': 0.06995487213134766, 'timestamp': '2025-10-01 04:42:17.273602', 'step': 18160, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:17.333287', 'step': 18160, 'epoch': 3} {'type': 'loss', 'content': 0.06908978521823883, 'timestamp': '2025-10-01 04:42:17.337038', 'step': 18161, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:17.400951', 'step': 18161, 'epoch': 3} {'type': 'loss', 'content': 0.16794879734516144, 'timestamp': '2025-10-01 04:42:17.404670', 'step': 18162, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:17.459072', 'step': 18162, 'epoch': 3} {'type': 'loss', 'content': 0.03821073845028877, 'timestamp': '2025-10-01 04:42:17.461369', 'step': 18163, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:17.524133', 'step': 18163, 'epoch': 3} {'type': 'loss', 'content': 0.11385861039161682, 'timestamp': '2025-10-01 04:42:17.530447', 'step': 18164, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:17.583785', 'step': 18164, 'epoch': 3} {'type': 'loss', 'content': 0.06324757635593414, 'timestamp': '2025-10-01 04:42:17.586018', 'step': 18165, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:17.639630', 'step': 18165, 'epoch': 3} {'type': 'loss', 'content': 0.11903642117977142, 'timestamp': '2025-10-01 04:42:17.641788', 'step': 18166, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:17.707411', 'step': 18166, 'epoch': 3} {'type': 'loss', 'content': 0.03171754255890846, 'timestamp': '2025-10-01 04:42:17.709545', 'step': 18167, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:17.762149', 'step': 18167, 'epoch': 3} {'type': 'loss', 'content': 0.17778249084949493, 'timestamp': '2025-10-01 04:42:17.768241', 'step': 18168, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:17.820997', 'step': 18168, 'epoch': 3} {'type': 'loss', 'content': 0.13460509479045868, 'timestamp': '2025-10-01 04:42:17.823196', 'step': 18169, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:17.877502', 'step': 18169, 'epoch': 3} {'type': 'loss', 'content': 0.06644190102815628, 'timestamp': '2025-10-01 04:42:17.879813', 'step': 18170, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:17.933431', 'step': 18170, 'epoch': 3} {'type': 'loss', 'content': 0.050413284450769424, 'timestamp': '2025-10-01 04:42:17.935573', 'step': 18171, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:17.989706', 'step': 18171, 'epoch': 3} {'type': 'loss', 'content': 0.0630299374461174, 'timestamp': '2025-10-01 04:42:17.995635', 'step': 18172, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:42:18.048358', 'step': 18172, 'epoch': 3} {'type': 'loss', 'content': 0.054378148168325424, 'timestamp': '2025-10-01 04:42:18.051570', 'step': 18173, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:18.105285', 'step': 18173, 'epoch': 3} {'type': 'loss', 'content': 0.10030876100063324, 'timestamp': '2025-10-01 04:42:18.107703', 'step': 18174, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-01 04:42:31.474491', 'step': 18174, 'epoch': 3} {'type': 'pplx', 'content': 10588.812216207549, 'timestamp': '2025-10-01 04:42:31.477474', 'step': 18174, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:31.531868', 'step': 18174, 'epoch': 3} {'type': 'loss', 'content': 0.06773895770311356, 'timestamp': '2025-10-01 04:42:31.534011', 'step': 18175, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:31.591329', 'step': 18175, 'epoch': 3} {'type': 'loss', 'content': 0.056996751576662064, 'timestamp': '2025-10-01 04:42:31.600218', 'step': 18176, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:31.657698', 'step': 18176, 'epoch': 3} {'type': 'loss', 'content': 0.07543949037790298, 'timestamp': '2025-10-01 04:42:31.660557', 'step': 18177, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:31.715275', 'step': 18177, 'epoch': 3} {'type': 'loss', 'content': 0.09584319591522217, 'timestamp': '2025-10-01 04:42:31.717990', 'step': 18178, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:31.773039', 'step': 18178, 'epoch': 3} {'type': 'loss', 'content': 0.14746826887130737, 'timestamp': '2025-10-01 04:42:31.775653', 'step': 18179, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:31.830007', 'step': 18179, 'epoch': 3} {'type': 'loss', 'content': 0.06261224299669266, 'timestamp': '2025-10-01 04:42:31.836086', 'step': 18180, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:31.899730', 'step': 18180, 'epoch': 3} {'type': 'loss', 'content': 0.09912791848182678, 'timestamp': '2025-10-01 04:42:31.902126', 'step': 18181, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:31.969069', 'step': 18181, 'epoch': 3} {'type': 'loss', 'content': 0.04105040058493614, 'timestamp': '2025-10-01 04:42:31.975952', 'step': 18182, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:32.031169', 'step': 18182, 'epoch': 3} {'type': 'loss', 'content': 0.07979630678892136, 'timestamp': '2025-10-01 04:42:32.033800', 'step': 18183, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:42:32.088591', 'step': 18183, 'epoch': 3} {'type': 'loss', 'content': 0.09824827313423157, 'timestamp': '2025-10-01 04:42:32.094598', 'step': 18184, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:42:32.148261', 'step': 18184, 'epoch': 3} {'type': 'loss', 'content': 0.08229842782020569, 'timestamp': '2025-10-01 04:42:32.150293', 'step': 18185, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:32.205359', 'step': 18185, 'epoch': 3} {'type': 'loss', 'content': 0.19846954941749573, 'timestamp': '2025-10-01 04:42:32.207820', 'step': 18186, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:32.263383', 'step': 18186, 'epoch': 3} {'type': 'loss', 'content': 0.12872453033924103, 'timestamp': '2025-10-01 04:42:32.265592', 'step': 18187, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:32.319786', 'step': 18187, 'epoch': 3} {'type': 'loss', 'content': 0.08677118271589279, 'timestamp': '2025-10-01 04:42:32.325943', 'step': 18188, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:32.379076', 'step': 18188, 'epoch': 3} {'type': 'loss', 'content': 0.13755746185779572, 'timestamp': '2025-10-01 04:42:32.381731', 'step': 18189, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:32.436115', 'step': 18189, 'epoch': 3} {'type': 'loss', 'content': 0.09806535392999649, 'timestamp': '2025-10-01 04:42:32.439066', 'step': 18190, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:32.496233', 'step': 18190, 'epoch': 3} {'type': 'loss', 'content': 0.07004597783088684, 'timestamp': '2025-10-01 04:42:32.498887', 'step': 18191, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:32.553424', 'step': 18191, 'epoch': 3} {'type': 'loss', 'content': 0.016662850975990295, 'timestamp': '2025-10-01 04:42:32.559763', 'step': 18192, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:32.614709', 'step': 18192, 'epoch': 3} {'type': 'loss', 'content': 0.12634502351284027, 'timestamp': '2025-10-01 04:42:32.617176', 'step': 18193, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:32.671692', 'step': 18193, 'epoch': 3} {'type': 'loss', 'content': 0.07782452553510666, 'timestamp': '2025-10-01 04:42:32.674073', 'step': 18194, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:32.728752', 'step': 18194, 'epoch': 3} {'type': 'loss', 'content': 0.04198724776506424, 'timestamp': '2025-10-01 04:42:32.737288', 'step': 18195, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:32.800070', 'step': 18195, 'epoch': 3} {'type': 'loss', 'content': 0.08807588368654251, 'timestamp': '2025-10-01 04:42:32.806022', 'step': 18196, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:32.859326', 'step': 18196, 'epoch': 3} {'type': 'loss', 'content': 0.07629929482936859, 'timestamp': '2025-10-01 04:42:32.861884', 'step': 18197, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:32.915879', 'step': 18197, 'epoch': 3} {'type': 'loss', 'content': 0.06493912637233734, 'timestamp': '2025-10-01 04:42:32.918066', 'step': 18198, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:32.972529', 'step': 18198, 'epoch': 3} {'type': 'loss', 'content': 0.16370101273059845, 'timestamp': '2025-10-01 04:42:32.975416', 'step': 18199, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:33.029285', 'step': 18199, 'epoch': 3} {'type': 'loss', 'content': 0.12598291039466858, 'timestamp': '2025-10-01 04:42:33.035367', 'step': 18200, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:42:33.090718', 'step': 18200, 'epoch': 3} {'type': 'loss', 'content': 0.08817928284406662, 'timestamp': '2025-10-01 04:42:33.092876', 'step': 18201, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:33.146798', 'step': 18201, 'epoch': 3} {'type': 'loss', 'content': 0.04898788407444954, 'timestamp': '2025-10-01 04:42:33.148973', 'step': 18202, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:33.203012', 'step': 18202, 'epoch': 3} {'type': 'loss', 'content': 0.15749816596508026, 'timestamp': '2025-10-01 04:42:33.206012', 'step': 18203, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:33.261371', 'step': 18203, 'epoch': 3} {'type': 'loss', 'content': 0.11239876598119736, 'timestamp': '2025-10-01 04:42:33.267293', 'step': 18204, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:33.320849', 'step': 18204, 'epoch': 3} {'type': 'loss', 'content': 0.17045503854751587, 'timestamp': '2025-10-01 04:42:33.323129', 'step': 18205, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:33.376719', 'step': 18205, 'epoch': 3} {'type': 'loss', 'content': 0.07882201671600342, 'timestamp': '2025-10-01 04:42:33.379077', 'step': 18206, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:33.450142', 'step': 18206, 'epoch': 3} {'type': 'loss', 'content': 0.10887633264064789, 'timestamp': '2025-10-01 04:42:33.452629', 'step': 18207, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:33.506713', 'step': 18207, 'epoch': 3} {'type': 'loss', 'content': 0.13153892755508423, 'timestamp': '2025-10-01 04:42:33.512359', 'step': 18208, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:42:33.575129', 'step': 18208, 'epoch': 3} {'type': 'loss', 'content': 0.09972335398197174, 'timestamp': '2025-10-01 04:42:33.577471', 'step': 18209, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:33.631104', 'step': 18209, 'epoch': 3} {'type': 'loss', 'content': 0.2117546945810318, 'timestamp': '2025-10-01 04:42:33.633280', 'step': 18210, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:33.688979', 'step': 18210, 'epoch': 3} {'type': 'loss', 'content': 0.10994362086057663, 'timestamp': '2025-10-01 04:42:33.691237', 'step': 18211, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:33.744820', 'step': 18211, 'epoch': 3} {'type': 'loss', 'content': 0.010293730534613132, 'timestamp': '2025-10-01 04:42:33.750675', 'step': 18212, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:33.803608', 'step': 18212, 'epoch': 3} {'type': 'loss', 'content': 0.14756566286087036, 'timestamp': '2025-10-01 04:42:33.811982', 'step': 18213, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:33.865836', 'step': 18213, 'epoch': 3} {'type': 'loss', 'content': 0.12919220328330994, 'timestamp': '2025-10-01 04:42:33.868097', 'step': 18214, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:33.922184', 'step': 18214, 'epoch': 3} {'type': 'loss', 'content': 0.04224526509642601, 'timestamp': '2025-10-01 04:42:33.926607', 'step': 18215, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:42:33.980302', 'step': 18215, 'epoch': 3} {'type': 'loss', 'content': 0.07981812208890915, 'timestamp': '2025-10-01 04:42:33.986161', 'step': 18216, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:34.039776', 'step': 18216, 'epoch': 3} {'type': 'loss', 'content': 0.09105517715215683, 'timestamp': '2025-10-01 04:42:34.042812', 'step': 18217, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:34.095675', 'step': 18217, 'epoch': 3} {'type': 'loss', 'content': 0.09270443767309189, 'timestamp': '2025-10-01 04:42:34.098785', 'step': 18218, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:34.152638', 'step': 18218, 'epoch': 3} {'type': 'loss', 'content': 0.044038716703653336, 'timestamp': '2025-10-01 04:42:34.154772', 'step': 18219, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:34.208701', 'step': 18219, 'epoch': 3} {'type': 'loss', 'content': 0.09697737544775009, 'timestamp': '2025-10-01 04:42:34.214550', 'step': 18220, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:34.269642', 'step': 18220, 'epoch': 3} {'type': 'loss', 'content': 0.08591359108686447, 'timestamp': '2025-10-01 04:42:34.271888', 'step': 18221, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:34.340015', 'step': 18221, 'epoch': 3} {'type': 'loss', 'content': 0.1439291387796402, 'timestamp': '2025-10-01 04:42:34.342121', 'step': 18222, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:34.395867', 'step': 18222, 'epoch': 3} {'type': 'loss', 'content': 0.09030523896217346, 'timestamp': '2025-10-01 04:42:34.397969', 'step': 18223, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:34.451768', 'step': 18223, 'epoch': 3} {'type': 'loss', 'content': 0.059048935770988464, 'timestamp': '2025-10-01 04:42:34.457460', 'step': 18224, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:42:34.511743', 'step': 18224, 'epoch': 3} {'type': 'loss', 'content': 0.13026030361652374, 'timestamp': '2025-10-01 04:42:34.514351', 'step': 18225, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:34.573228', 'step': 18225, 'epoch': 3} {'type': 'loss', 'content': 0.059501249343156815, 'timestamp': '2025-10-01 04:42:34.575480', 'step': 18226, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:34.634356', 'step': 18226, 'epoch': 3} {'type': 'loss', 'content': 0.11561332643032074, 'timestamp': '2025-10-01 04:42:34.636483', 'step': 18227, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:34.690271', 'step': 18227, 'epoch': 3} {'type': 'loss', 'content': 0.10843171924352646, 'timestamp': '2025-10-01 04:42:34.696690', 'step': 18228, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:34.750390', 'step': 18228, 'epoch': 3} {'type': 'loss', 'content': 0.10484465211629868, 'timestamp': '2025-10-01 04:42:34.752546', 'step': 18229, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:34.806434', 'step': 18229, 'epoch': 3} {'type': 'loss', 'content': 0.06492579728364944, 'timestamp': '2025-10-01 04:42:34.809868', 'step': 18230, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:34.863988', 'step': 18230, 'epoch': 3} {'type': 'loss', 'content': 0.11490325629711151, 'timestamp': '2025-10-01 04:42:34.866194', 'step': 18231, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:34.934990', 'step': 18231, 'epoch': 3} {'type': 'loss', 'content': 0.07676316797733307, 'timestamp': '2025-10-01 04:42:34.940750', 'step': 18232, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:34.993145', 'step': 18232, 'epoch': 3} {'type': 'loss', 'content': 0.054944079369306564, 'timestamp': '2025-10-01 04:42:34.995293', 'step': 18233, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:42:35.049225', 'step': 18233, 'epoch': 3} {'type': 'loss', 'content': 0.09684035181999207, 'timestamp': '2025-10-01 04:42:35.051943', 'step': 18234, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:42:35.109139', 'step': 18234, 'epoch': 3} {'type': 'loss', 'content': 0.08350799232721329, 'timestamp': '2025-10-01 04:42:35.111486', 'step': 18235, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:35.166005', 'step': 18235, 'epoch': 3} {'type': 'loss', 'content': 0.09087226539850235, 'timestamp': '2025-10-01 04:42:35.172089', 'step': 18236, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:42:35.225134', 'step': 18236, 'epoch': 3} {'type': 'loss', 'content': 0.047219522297382355, 'timestamp': '2025-10-01 04:42:35.227325', 'step': 18237, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:35.280793', 'step': 18237, 'epoch': 3} {'type': 'loss', 'content': 0.11760830879211426, 'timestamp': '2025-10-01 04:42:35.282982', 'step': 18238, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:35.338562', 'step': 18238, 'epoch': 3} {'type': 'loss', 'content': 0.12372439354658127, 'timestamp': '2025-10-01 04:42:35.340785', 'step': 18239, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:35.395670', 'step': 18239, 'epoch': 3} {'type': 'loss', 'content': 0.049590665847063065, 'timestamp': '2025-10-01 04:42:35.401429', 'step': 18240, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:35.454328', 'step': 18240, 'epoch': 3} {'type': 'loss', 'content': 0.1313350945711136, 'timestamp': '2025-10-01 04:42:35.456454', 'step': 18241, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:35.509930', 'step': 18241, 'epoch': 3} {'type': 'loss', 'content': 0.05836183950304985, 'timestamp': '2025-10-01 04:42:35.512276', 'step': 18242, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:42:35.566948', 'step': 18242, 'epoch': 3} {'type': 'loss', 'content': 0.015832211822271347, 'timestamp': '2025-10-01 04:42:35.570282', 'step': 18243, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:35.624579', 'step': 18243, 'epoch': 3} {'type': 'loss', 'content': 0.05863795801997185, 'timestamp': '2025-10-01 04:42:35.640878', 'step': 18244, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:35.699952', 'step': 18244, 'epoch': 3} {'type': 'loss', 'content': 0.14536404609680176, 'timestamp': '2025-10-01 04:42:35.702182', 'step': 18245, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:35.768257', 'step': 18245, 'epoch': 3} {'type': 'loss', 'content': 0.07061665505170822, 'timestamp': '2025-10-01 04:42:35.770341', 'step': 18246, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:35.823952', 'step': 18246, 'epoch': 3} {'type': 'loss', 'content': 0.126633420586586, 'timestamp': '2025-10-01 04:42:35.826044', 'step': 18247, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-01 04:42:35.879540', 'step': 18247, 'epoch': 3} {'type': 'loss', 'content': 0.1171790286898613, 'timestamp': '2025-10-01 04:42:35.885405', 'step': 18248, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:35.939255', 'step': 18248, 'epoch': 3} {'type': 'loss', 'content': 0.13072438538074493, 'timestamp': '2025-10-01 04:42:35.941676', 'step': 18249, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:42:35.995539', 'step': 18249, 'epoch': 3} {'type': 'loss', 'content': 0.07346583902835846, 'timestamp': '2025-10-01 04:42:35.997819', 'step': 18250, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:42:36.051692', 'step': 18250, 'epoch': 3} {'type': 'loss', 'content': 0.046378243714571, 'timestamp': '2025-10-01 04:42:36.054238', 'step': 18251, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:36.107918', 'step': 18251, 'epoch': 3} {'type': 'loss', 'content': 0.08530591428279877, 'timestamp': '2025-10-01 04:42:36.124017', 'step': 18252, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:36.177784', 'step': 18252, 'epoch': 3} {'type': 'loss', 'content': 0.04194450005888939, 'timestamp': '2025-10-01 04:42:36.179915', 'step': 18253, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:36.239607', 'step': 18253, 'epoch': 3} {'type': 'loss', 'content': 0.09076730906963348, 'timestamp': '2025-10-01 04:42:36.241768', 'step': 18254, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:42:36.304362', 'step': 18254, 'epoch': 3} {'type': 'loss', 'content': 0.037064068019390106, 'timestamp': '2025-10-01 04:42:36.306484', 'step': 18255, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:36.359299', 'step': 18255, 'epoch': 3} {'type': 'loss', 'content': 0.11021733283996582, 'timestamp': '2025-10-01 04:42:36.365271', 'step': 18256, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:36.418075', 'step': 18256, 'epoch': 3} {'type': 'loss', 'content': 0.0522887147963047, 'timestamp': '2025-10-01 04:42:36.420279', 'step': 18257, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:36.473665', 'step': 18257, 'epoch': 3} {'type': 'loss', 'content': 0.061296332627534866, 'timestamp': '2025-10-01 04:42:36.475804', 'step': 18258, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:36.529717', 'step': 18258, 'epoch': 3} {'type': 'loss', 'content': 0.1685219556093216, 'timestamp': '2025-10-01 04:42:36.531832', 'step': 18259, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-01 04:42:36.601634', 'step': 18259, 'epoch': 3} {'type': 'loss', 'content': 0.0854446068406105, 'timestamp': '2025-10-01 04:42:36.614830', 'step': 18260, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:36.669240', 'step': 18260, 'epoch': 3} {'type': 'loss', 'content': 0.08267267793416977, 'timestamp': '2025-10-01 04:42:36.671595', 'step': 18261, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:36.725310', 'step': 18261, 'epoch': 3} {'type': 'loss', 'content': 0.1648654192686081, 'timestamp': '2025-10-01 04:42:36.727772', 'step': 18262, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:36.781601', 'step': 18262, 'epoch': 3} {'type': 'loss', 'content': 0.09925256669521332, 'timestamp': '2025-10-01 04:42:36.783945', 'step': 18263, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:36.845254', 'step': 18263, 'epoch': 3} {'type': 'loss', 'content': 0.12099365890026093, 'timestamp': '2025-10-01 04:42:36.851082', 'step': 18264, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:36.914711', 'step': 18264, 'epoch': 3} {'type': 'loss', 'content': 0.12677977979183197, 'timestamp': '2025-10-01 04:42:36.917235', 'step': 18265, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:36.982897', 'step': 18265, 'epoch': 3} {'type': 'loss', 'content': 0.10845575481653214, 'timestamp': '2025-10-01 04:42:36.985072', 'step': 18266, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:37.038280', 'step': 18266, 'epoch': 3} {'type': 'loss', 'content': 0.06933680921792984, 'timestamp': '2025-10-01 04:42:37.040363', 'step': 18267, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:37.099718', 'step': 18267, 'epoch': 3} {'type': 'loss', 'content': 0.08800258487462997, 'timestamp': '2025-10-01 04:42:37.110011', 'step': 18268, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:37.182615', 'step': 18268, 'epoch': 3} {'type': 'loss', 'content': 0.1389496624469757, 'timestamp': '2025-10-01 04:42:37.184798', 'step': 18269, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:37.238350', 'step': 18269, 'epoch': 3} {'type': 'loss', 'content': 0.05765075981616974, 'timestamp': '2025-10-01 04:42:37.247770', 'step': 18270, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:37.301282', 'step': 18270, 'epoch': 3} {'type': 'loss', 'content': 0.06748604029417038, 'timestamp': '2025-10-01 04:42:37.303968', 'step': 18271, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:37.358271', 'step': 18271, 'epoch': 3} {'type': 'loss', 'content': 0.07143200933933258, 'timestamp': '2025-10-01 04:42:37.364106', 'step': 18272, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:42:37.417833', 'step': 18272, 'epoch': 3} {'type': 'loss', 'content': 0.049733784049749374, 'timestamp': '2025-10-01 04:42:37.420399', 'step': 18273, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:37.475129', 'step': 18273, 'epoch': 3} {'type': 'loss', 'content': 0.068785160779953, 'timestamp': '2025-10-01 04:42:37.477695', 'step': 18274, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:37.531417', 'step': 18274, 'epoch': 3} {'type': 'loss', 'content': 0.14785198867321014, 'timestamp': '2025-10-01 04:42:37.535032', 'step': 18275, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:42:37.590921', 'step': 18275, 'epoch': 3} {'type': 'loss', 'content': 0.11396089941263199, 'timestamp': '2025-10-01 04:42:37.596916', 'step': 18276, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:37.662193', 'step': 18276, 'epoch': 3} {'type': 'loss', 'content': 0.09852380305528641, 'timestamp': '2025-10-01 04:42:37.664371', 'step': 18277, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:42:37.718752', 'step': 18277, 'epoch': 3} {'type': 'loss', 'content': 0.060061462223529816, 'timestamp': '2025-10-01 04:42:37.721066', 'step': 18278, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:42:37.784548', 'step': 18278, 'epoch': 3} {'type': 'loss', 'content': 0.05301518365740776, 'timestamp': '2025-10-01 04:42:37.786893', 'step': 18279, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:37.840839', 'step': 18279, 'epoch': 3} {'type': 'loss', 'content': 0.13886044919490814, 'timestamp': '2025-10-01 04:42:37.846995', 'step': 18280, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:37.900437', 'step': 18280, 'epoch': 3} {'type': 'loss', 'content': 0.0952659472823143, 'timestamp': '2025-10-01 04:42:37.911875', 'step': 18281, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:37.985343', 'step': 18281, 'epoch': 3} {'type': 'loss', 'content': 0.09950418025255203, 'timestamp': '2025-10-01 04:42:37.987544', 'step': 18282, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:38.040925', 'step': 18282, 'epoch': 3} {'type': 'loss', 'content': 0.06042615324258804, 'timestamp': '2025-10-01 04:42:38.043122', 'step': 18283, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:38.098038', 'step': 18283, 'epoch': 3} {'type': 'loss', 'content': 0.05321331322193146, 'timestamp': '2025-10-01 04:42:38.103735', 'step': 18284, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:38.157106', 'step': 18284, 'epoch': 3} {'type': 'loss', 'content': 0.16265800595283508, 'timestamp': '2025-10-01 04:42:38.159400', 'step': 18285, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:38.213305', 'step': 18285, 'epoch': 3} {'type': 'loss', 'content': 0.02743409015238285, 'timestamp': '2025-10-01 04:42:38.215478', 'step': 18286, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:38.269760', 'step': 18286, 'epoch': 3} {'type': 'loss', 'content': 0.11210978776216507, 'timestamp': '2025-10-01 04:42:38.273655', 'step': 18287, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:42:38.328696', 'step': 18287, 'epoch': 3} {'type': 'loss', 'content': 0.06635066866874695, 'timestamp': '2025-10-01 04:42:38.334549', 'step': 18288, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:38.388355', 'step': 18288, 'epoch': 3} {'type': 'loss', 'content': 0.10317040234804153, 'timestamp': '2025-10-01 04:42:38.390750', 'step': 18289, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:38.444548', 'step': 18289, 'epoch': 3} {'type': 'loss', 'content': 0.06309723109006882, 'timestamp': '2025-10-01 04:42:38.446852', 'step': 18290, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:38.503850', 'step': 18290, 'epoch': 3} {'type': 'loss', 'content': 0.13176189363002777, 'timestamp': '2025-10-01 04:42:38.506032', 'step': 18291, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:38.559662', 'step': 18291, 'epoch': 3} {'type': 'loss', 'content': 0.08250224590301514, 'timestamp': '2025-10-01 04:42:38.569867', 'step': 18292, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:38.623633', 'step': 18292, 'epoch': 3} {'type': 'loss', 'content': 0.07949007302522659, 'timestamp': '2025-10-01 04:42:38.630966', 'step': 18293, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:42:38.684767', 'step': 18293, 'epoch': 3} {'type': 'loss', 'content': 0.10629799216985703, 'timestamp': '2025-10-01 04:42:38.686885', 'step': 18294, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:38.740576', 'step': 18294, 'epoch': 3} {'type': 'loss', 'content': 0.051417794078588486, 'timestamp': '2025-10-01 04:42:38.743395', 'step': 18295, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:38.797467', 'step': 18295, 'epoch': 3} {'type': 'loss', 'content': 0.1670176386833191, 'timestamp': '2025-10-01 04:42:38.803197', 'step': 18296, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:38.855741', 'step': 18296, 'epoch': 3} {'type': 'loss', 'content': 0.07699494063854218, 'timestamp': '2025-10-01 04:42:38.857875', 'step': 18297, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:38.911485', 'step': 18297, 'epoch': 3} {'type': 'loss', 'content': 0.11783482879400253, 'timestamp': '2025-10-01 04:42:38.913597', 'step': 18298, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:38.967450', 'step': 18298, 'epoch': 3} {'type': 'loss', 'content': 0.06370270252227783, 'timestamp': '2025-10-01 04:42:38.969665', 'step': 18299, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:39.023047', 'step': 18299, 'epoch': 3} {'type': 'loss', 'content': 0.11795582622289658, 'timestamp': '2025-10-01 04:42:39.029125', 'step': 18300, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:39.083140', 'step': 18300, 'epoch': 3} {'type': 'loss', 'content': 0.10388706624507904, 'timestamp': '2025-10-01 04:42:39.085264', 'step': 18301, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:39.141506', 'step': 18301, 'epoch': 3} {'type': 'loss', 'content': 0.17034804821014404, 'timestamp': '2025-10-01 04:42:39.143682', 'step': 18302, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:39.197636', 'step': 18302, 'epoch': 3} {'type': 'loss', 'content': 0.09380961954593658, 'timestamp': '2025-10-01 04:42:39.199884', 'step': 18303, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:39.253297', 'step': 18303, 'epoch': 3} {'type': 'loss', 'content': 0.046035487204790115, 'timestamp': '2025-10-01 04:42:39.259081', 'step': 18304, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:39.312222', 'step': 18304, 'epoch': 3} {'type': 'loss', 'content': 0.08782575279474258, 'timestamp': '2025-10-01 04:42:39.314354', 'step': 18305, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:39.367846', 'step': 18305, 'epoch': 3} {'type': 'loss', 'content': 0.09996599704027176, 'timestamp': '2025-10-01 04:42:39.370489', 'step': 18306, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:39.425061', 'step': 18306, 'epoch': 3} {'type': 'loss', 'content': 0.08833532780408859, 'timestamp': '2025-10-01 04:42:39.432658', 'step': 18307, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:39.488203', 'step': 18307, 'epoch': 3} {'type': 'loss', 'content': 0.04806165397167206, 'timestamp': '2025-10-01 04:42:39.494528', 'step': 18308, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:39.548644', 'step': 18308, 'epoch': 3} {'type': 'loss', 'content': 0.09182926267385483, 'timestamp': '2025-10-01 04:42:39.553948', 'step': 18309, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:39.608486', 'step': 18309, 'epoch': 3} {'type': 'loss', 'content': 0.12248112261295319, 'timestamp': '2025-10-01 04:42:39.611444', 'step': 18310, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:39.668476', 'step': 18310, 'epoch': 3} {'type': 'loss', 'content': 0.12277870625257492, 'timestamp': '2025-10-01 04:42:39.670595', 'step': 18311, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:39.736693', 'step': 18311, 'epoch': 3} {'type': 'loss', 'content': 0.10583250224590302, 'timestamp': '2025-10-01 04:42:39.742989', 'step': 18312, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:39.796420', 'step': 18312, 'epoch': 3} {'type': 'loss', 'content': 0.07500606030225754, 'timestamp': '2025-10-01 04:42:39.798522', 'step': 18313, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:39.852971', 'step': 18313, 'epoch': 3} {'type': 'loss', 'content': 0.05508840084075928, 'timestamp': '2025-10-01 04:42:39.855094', 'step': 18314, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:39.909242', 'step': 18314, 'epoch': 3} {'type': 'loss', 'content': 0.04276299104094505, 'timestamp': '2025-10-01 04:42:39.911363', 'step': 18315, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:39.964833', 'step': 18315, 'epoch': 3} {'type': 'loss', 'content': 0.09067807346582413, 'timestamp': '2025-10-01 04:42:39.971051', 'step': 18316, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:40.038880', 'step': 18316, 'epoch': 3} {'type': 'loss', 'content': 0.10674826055765152, 'timestamp': '2025-10-01 04:42:40.041103', 'step': 18317, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:40.113691', 'step': 18317, 'epoch': 3} {'type': 'loss', 'content': 0.1327163577079773, 'timestamp': '2025-10-01 04:42:40.117394', 'step': 18318, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:40.171396', 'step': 18318, 'epoch': 3} {'type': 'loss', 'content': 0.11963316053152084, 'timestamp': '2025-10-01 04:42:40.173759', 'step': 18319, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:40.227346', 'step': 18319, 'epoch': 3} {'type': 'loss', 'content': 0.0671931579709053, 'timestamp': '2025-10-01 04:42:40.236908', 'step': 18320, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:40.294523', 'step': 18320, 'epoch': 3} {'type': 'loss', 'content': 0.0797484889626503, 'timestamp': '2025-10-01 04:42:40.296882', 'step': 18321, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:40.350015', 'step': 18321, 'epoch': 3} {'type': 'loss', 'content': 0.09902016073465347, 'timestamp': '2025-10-01 04:42:40.354333', 'step': 18322, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:42:40.409755', 'step': 18322, 'epoch': 3} {'type': 'loss', 'content': 0.13087056577205658, 'timestamp': '2025-10-01 04:42:40.411959', 'step': 18323, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:40.465828', 'step': 18323, 'epoch': 3} {'type': 'loss', 'content': 0.10573175549507141, 'timestamp': '2025-10-01 04:42:40.471965', 'step': 18324, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:40.524814', 'step': 18324, 'epoch': 3} {'type': 'loss', 'content': 0.0871925875544548, 'timestamp': '2025-10-01 04:42:40.527005', 'step': 18325, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:40.581584', 'step': 18325, 'epoch': 3} {'type': 'loss', 'content': 0.06745517998933792, 'timestamp': '2025-10-01 04:42:40.583783', 'step': 18326, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:42:40.639474', 'step': 18326, 'epoch': 3} {'type': 'loss', 'content': 0.12836207449436188, 'timestamp': '2025-10-01 04:42:40.642688', 'step': 18327, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:40.698260', 'step': 18327, 'epoch': 3} {'type': 'loss', 'content': 0.12229321151971817, 'timestamp': '2025-10-01 04:42:40.704560', 'step': 18328, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:40.759208', 'step': 18328, 'epoch': 3} {'type': 'loss', 'content': 0.0592803880572319, 'timestamp': '2025-10-01 04:42:40.761585', 'step': 18329, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:40.820279', 'step': 18329, 'epoch': 3} {'type': 'loss', 'content': 0.048004716634750366, 'timestamp': '2025-10-01 04:42:40.822455', 'step': 18330, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:40.894800', 'step': 18330, 'epoch': 3} {'type': 'loss', 'content': 0.04246427118778229, 'timestamp': '2025-10-01 04:42:40.897239', 'step': 18331, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:40.954491', 'step': 18331, 'epoch': 3} {'type': 'loss', 'content': 0.14600279927253723, 'timestamp': '2025-10-01 04:42:40.961040', 'step': 18332, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:41.016154', 'step': 18332, 'epoch': 3} {'type': 'loss', 'content': 0.1356237381696701, 'timestamp': '2025-10-01 04:42:41.018674', 'step': 18333, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:41.073970', 'step': 18333, 'epoch': 3} {'type': 'loss', 'content': 0.043982040137052536, 'timestamp': '2025-10-01 04:42:41.076247', 'step': 18334, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:41.131727', 'step': 18334, 'epoch': 3} {'type': 'loss', 'content': 0.10584839433431625, 'timestamp': '2025-10-01 04:42:41.134496', 'step': 18335, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:41.190856', 'step': 18335, 'epoch': 3} {'type': 'loss', 'content': 0.10272687673568726, 'timestamp': '2025-10-01 04:42:41.207598', 'step': 18336, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:41.272876', 'step': 18336, 'epoch': 3} {'type': 'loss', 'content': 0.029263142496347427, 'timestamp': '2025-10-01 04:42:41.275610', 'step': 18337, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:41.330468', 'step': 18337, 'epoch': 3} {'type': 'loss', 'content': 0.07396118342876434, 'timestamp': '2025-10-01 04:42:41.332784', 'step': 18338, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:41.387681', 'step': 18338, 'epoch': 3} {'type': 'loss', 'content': 0.07437511533498764, 'timestamp': '2025-10-01 04:42:41.390330', 'step': 18339, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:41.444168', 'step': 18339, 'epoch': 3} {'type': 'loss', 'content': 0.027817964553833008, 'timestamp': '2025-10-01 04:42:41.450599', 'step': 18340, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:41.504878', 'step': 18340, 'epoch': 3} {'type': 'loss', 'content': 0.0855151116847992, 'timestamp': '2025-10-01 04:42:41.507311', 'step': 18341, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:41.562828', 'step': 18341, 'epoch': 3} {'type': 'loss', 'content': 0.08253123611211777, 'timestamp': '2025-10-01 04:42:41.565288', 'step': 18342, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:42:41.620375', 'step': 18342, 'epoch': 3} {'type': 'loss', 'content': 0.11399022489786148, 'timestamp': '2025-10-01 04:42:41.622602', 'step': 18343, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:41.676836', 'step': 18343, 'epoch': 3} {'type': 'loss', 'content': 0.08944344520568848, 'timestamp': '2025-10-01 04:42:41.683126', 'step': 18344, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:41.736837', 'step': 18344, 'epoch': 3} {'type': 'loss', 'content': 0.05567243695259094, 'timestamp': '2025-10-01 04:42:41.739180', 'step': 18345, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:41.792902', 'step': 18345, 'epoch': 3} {'type': 'loss', 'content': 0.04373897984623909, 'timestamp': '2025-10-01 04:42:41.795018', 'step': 18346, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:41.849281', 'step': 18346, 'epoch': 3} {'type': 'loss', 'content': 0.18298088014125824, 'timestamp': '2025-10-01 04:42:41.852967', 'step': 18347, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:41.908243', 'step': 18347, 'epoch': 3} {'type': 'loss', 'content': 0.17228297889232635, 'timestamp': '2025-10-01 04:42:41.914787', 'step': 18348, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:41.968750', 'step': 18348, 'epoch': 3} {'type': 'loss', 'content': 0.04112483188509941, 'timestamp': '2025-10-01 04:42:41.982117', 'step': 18349, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:42:42.037613', 'step': 18349, 'epoch': 3} {'type': 'loss', 'content': 0.13975635170936584, 'timestamp': '2025-10-01 04:42:42.040025', 'step': 18350, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:42.094528', 'step': 18350, 'epoch': 3} {'type': 'loss', 'content': 0.12402518093585968, 'timestamp': '2025-10-01 04:42:42.096867', 'step': 18351, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:42.150962', 'step': 18351, 'epoch': 3} {'type': 'loss', 'content': 0.0845128744840622, 'timestamp': '2025-10-01 04:42:42.157105', 'step': 18352, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:42.210564', 'step': 18352, 'epoch': 3} {'type': 'loss', 'content': 0.05216549336910248, 'timestamp': '2025-10-01 04:42:42.212836', 'step': 18353, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:42.267675', 'step': 18353, 'epoch': 3} {'type': 'loss', 'content': 0.10085778683423996, 'timestamp': '2025-10-01 04:42:42.269817', 'step': 18354, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:42.323441', 'step': 18354, 'epoch': 3} {'type': 'loss', 'content': 0.05777287110686302, 'timestamp': '2025-10-01 04:42:42.336412', 'step': 18355, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:42.392185', 'step': 18355, 'epoch': 3} {'type': 'loss', 'content': 0.16587121784687042, 'timestamp': '2025-10-01 04:42:42.398044', 'step': 18356, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:42:42.451102', 'step': 18356, 'epoch': 3} {'type': 'loss', 'content': 0.06361570209264755, 'timestamp': '2025-10-01 04:42:42.453256', 'step': 18357, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:42.506768', 'step': 18357, 'epoch': 3} {'type': 'loss', 'content': 0.15959586203098297, 'timestamp': '2025-10-01 04:42:42.508886', 'step': 18358, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:42:42.562618', 'step': 18358, 'epoch': 3} {'type': 'loss', 'content': 0.051616132259368896, 'timestamp': '2025-10-01 04:42:42.564713', 'step': 18359, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:42.618417', 'step': 18359, 'epoch': 3} {'type': 'loss', 'content': 0.0731746181845665, 'timestamp': '2025-10-01 04:42:42.624146', 'step': 18360, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:42:42.677157', 'step': 18360, 'epoch': 3} {'type': 'loss', 'content': 0.0751275047659874, 'timestamp': '2025-10-01 04:42:42.679295', 'step': 18361, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:42.732528', 'step': 18361, 'epoch': 3} {'type': 'loss', 'content': 0.14447830617427826, 'timestamp': '2025-10-01 04:42:42.734769', 'step': 18362, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:42.788691', 'step': 18362, 'epoch': 3} {'type': 'loss', 'content': 0.08404308557510376, 'timestamp': '2025-10-01 04:42:42.790798', 'step': 18363, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:42:42.844932', 'step': 18363, 'epoch': 3} {'type': 'loss', 'content': 0.051099274307489395, 'timestamp': '2025-10-01 04:42:42.850894', 'step': 18364, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:42.903927', 'step': 18364, 'epoch': 3} {'type': 'loss', 'content': 0.10020043700933456, 'timestamp': '2025-10-01 04:42:42.906289', 'step': 18365, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:42.959518', 'step': 18365, 'epoch': 3} {'type': 'loss', 'content': 0.08866722881793976, 'timestamp': '2025-10-01 04:42:42.962700', 'step': 18366, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:43.016653', 'step': 18366, 'epoch': 3} {'type': 'loss', 'content': 0.059196244925260544, 'timestamp': '2025-10-01 04:42:43.018840', 'step': 18367, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:43.071932', 'step': 18367, 'epoch': 3} {'type': 'loss', 'content': 0.12393122911453247, 'timestamp': '2025-10-01 04:42:43.077626', 'step': 18368, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:42:43.130405', 'step': 18368, 'epoch': 3} {'type': 'loss', 'content': 0.1175752803683281, 'timestamp': '2025-10-01 04:42:43.132542', 'step': 18369, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:42:43.185726', 'step': 18369, 'epoch': 3} {'type': 'loss', 'content': 0.04558103159070015, 'timestamp': '2025-10-01 04:42:43.188590', 'step': 18370, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:43.243917', 'step': 18370, 'epoch': 3} {'type': 'loss', 'content': 0.20836709439754486, 'timestamp': '2025-10-01 04:42:43.246112', 'step': 18371, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:42:43.299542', 'step': 18371, 'epoch': 3} {'type': 'loss', 'content': 0.14987137913703918, 'timestamp': '2025-10-01 04:42:43.305507', 'step': 18372, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:43.370151', 'step': 18372, 'epoch': 3} {'type': 'loss', 'content': 0.09854746609926224, 'timestamp': '2025-10-01 04:42:43.372160', 'step': 18373, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:42:43.425004', 'step': 18373, 'epoch': 3} {'type': 'loss', 'content': 0.10561945289373398, 'timestamp': '2025-10-01 04:42:43.427138', 'step': 18374, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:43.483047', 'step': 18374, 'epoch': 3} {'type': 'loss', 'content': 0.07654950767755508, 'timestamp': '2025-10-01 04:42:43.485174', 'step': 18375, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:43.539275', 'step': 18375, 'epoch': 3} {'type': 'loss', 'content': 0.12797358632087708, 'timestamp': '2025-10-01 04:42:43.545058', 'step': 18376, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:43.597804', 'step': 18376, 'epoch': 3} {'type': 'loss', 'content': 0.03490541875362396, 'timestamp': '2025-10-01 04:42:43.599931', 'step': 18377, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:43.653454', 'step': 18377, 'epoch': 3} {'type': 'loss', 'content': 0.049558717757463455, 'timestamp': '2025-10-01 04:42:43.655709', 'step': 18378, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:43.709433', 'step': 18378, 'epoch': 3} {'type': 'loss', 'content': 0.12270752340555191, 'timestamp': '2025-10-01 04:42:43.711883', 'step': 18379, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:43.765114', 'step': 18379, 'epoch': 3} {'type': 'loss', 'content': 0.1402530074119568, 'timestamp': '2025-10-01 04:42:43.772057', 'step': 18380, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:43.825316', 'step': 18380, 'epoch': 3} {'type': 'loss', 'content': 0.09066635370254517, 'timestamp': '2025-10-01 04:42:43.827507', 'step': 18381, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:43.893953', 'step': 18381, 'epoch': 3} {'type': 'loss', 'content': 0.045372024178504944, 'timestamp': '2025-10-01 04:42:43.896233', 'step': 18382, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:43.949701', 'step': 18382, 'epoch': 3} {'type': 'loss', 'content': 0.09409977495670319, 'timestamp': '2025-10-01 04:42:43.951809', 'step': 18383, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:44.005066', 'step': 18383, 'epoch': 3} {'type': 'loss', 'content': 0.10124542564153671, 'timestamp': '2025-10-01 04:42:44.010742', 'step': 18384, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:44.063680', 'step': 18384, 'epoch': 3} {'type': 'loss', 'content': 0.10697514563798904, 'timestamp': '2025-10-01 04:42:44.065831', 'step': 18385, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:44.119788', 'step': 18385, 'epoch': 3} {'type': 'loss', 'content': 0.07256031036376953, 'timestamp': '2025-10-01 04:42:44.123311', 'step': 18386, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:44.176831', 'step': 18386, 'epoch': 3} {'type': 'loss', 'content': 0.13919682800769806, 'timestamp': '2025-10-01 04:42:44.178849', 'step': 18387, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:44.232342', 'step': 18387, 'epoch': 3} {'type': 'loss', 'content': 0.05583786964416504, 'timestamp': '2025-10-01 04:42:44.238096', 'step': 18388, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:44.308060', 'step': 18388, 'epoch': 3} {'type': 'loss', 'content': 0.09634634107351303, 'timestamp': '2025-10-01 04:42:44.310201', 'step': 18389, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:44.363601', 'step': 18389, 'epoch': 3} {'type': 'loss', 'content': 0.07779605686664581, 'timestamp': '2025-10-01 04:42:44.367160', 'step': 18390, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:42:44.420962', 'step': 18390, 'epoch': 3} {'type': 'loss', 'content': 0.15932315587997437, 'timestamp': '2025-10-01 04:42:44.422910', 'step': 18391, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:44.477652', 'step': 18391, 'epoch': 3} {'type': 'loss', 'content': 0.07688925415277481, 'timestamp': '2025-10-01 04:42:44.483551', 'step': 18392, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:44.537589', 'step': 18392, 'epoch': 3} {'type': 'loss', 'content': 0.05580363795161247, 'timestamp': '2025-10-01 04:42:44.539948', 'step': 18393, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:44.598688', 'step': 18393, 'epoch': 3} {'type': 'loss', 'content': 0.12546761333942413, 'timestamp': '2025-10-01 04:42:44.600880', 'step': 18394, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:42:44.655112', 'step': 18394, 'epoch': 3} {'type': 'loss', 'content': 0.0844666063785553, 'timestamp': '2025-10-01 04:42:44.657290', 'step': 18395, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:44.710625', 'step': 18395, 'epoch': 3} {'type': 'loss', 'content': 0.24311505258083344, 'timestamp': '2025-10-01 04:42:44.717071', 'step': 18396, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:44.771901', 'step': 18396, 'epoch': 3} {'type': 'loss', 'content': 0.07294043898582458, 'timestamp': '2025-10-01 04:42:44.773983', 'step': 18397, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:44.827263', 'step': 18397, 'epoch': 3} {'type': 'loss', 'content': 0.09133583307266235, 'timestamp': '2025-10-01 04:42:44.829403', 'step': 18398, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:44.882894', 'step': 18398, 'epoch': 3} {'type': 'loss', 'content': 0.0960419625043869, 'timestamp': '2025-10-01 04:42:44.884974', 'step': 18399, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:44.939851', 'step': 18399, 'epoch': 3} {'type': 'loss', 'content': 0.08288628607988358, 'timestamp': '2025-10-01 04:42:44.951579', 'step': 18400, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:45.016174', 'step': 18400, 'epoch': 3} {'type': 'loss', 'content': 0.10107596218585968, 'timestamp': '2025-10-01 04:42:45.019403', 'step': 18401, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:45.074726', 'step': 18401, 'epoch': 3} {'type': 'loss', 'content': 0.17578740417957306, 'timestamp': '2025-10-01 04:42:45.077071', 'step': 18402, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:42:45.136215', 'step': 18402, 'epoch': 3} {'type': 'loss', 'content': 0.1338825225830078, 'timestamp': '2025-10-01 04:42:45.138422', 'step': 18403, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:45.192449', 'step': 18403, 'epoch': 3} {'type': 'loss', 'content': 0.08640044927597046, 'timestamp': '2025-10-01 04:42:45.198237', 'step': 18404, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:45.251883', 'step': 18404, 'epoch': 3} {'type': 'loss', 'content': 0.061701320111751556, 'timestamp': '2025-10-01 04:42:45.269644', 'step': 18405, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:45.323317', 'step': 18405, 'epoch': 3} {'type': 'loss', 'content': 0.10816080123186111, 'timestamp': '2025-10-01 04:42:45.325415', 'step': 18406, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:45.379092', 'step': 18406, 'epoch': 3} {'type': 'loss', 'content': 0.07741624861955643, 'timestamp': '2025-10-01 04:42:45.381356', 'step': 18407, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:42:45.434779', 'step': 18407, 'epoch': 3} {'type': 'loss', 'content': 0.06758137047290802, 'timestamp': '2025-10-01 04:42:45.441435', 'step': 18408, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:45.494616', 'step': 18408, 'epoch': 3} {'type': 'loss', 'content': 0.060192592442035675, 'timestamp': '2025-10-01 04:42:45.496813', 'step': 18409, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:42:45.552068', 'step': 18409, 'epoch': 3} {'type': 'loss', 'content': 0.07064671069383621, 'timestamp': '2025-10-01 04:42:45.554265', 'step': 18410, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:45.608285', 'step': 18410, 'epoch': 3} {'type': 'loss', 'content': 0.06300349533557892, 'timestamp': '2025-10-01 04:42:45.610488', 'step': 18411, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:45.664748', 'step': 18411, 'epoch': 3} {'type': 'loss', 'content': 0.09925254434347153, 'timestamp': '2025-10-01 04:42:45.670461', 'step': 18412, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:45.731115', 'step': 18412, 'epoch': 3} {'type': 'loss', 'content': 0.030303310602903366, 'timestamp': '2025-10-01 04:42:45.733275', 'step': 18413, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:45.787469', 'step': 18413, 'epoch': 3} {'type': 'loss', 'content': 0.10519836843013763, 'timestamp': '2025-10-01 04:42:45.789570', 'step': 18414, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:45.842538', 'step': 18414, 'epoch': 3} {'type': 'loss', 'content': 0.08074156194925308, 'timestamp': '2025-10-01 04:42:45.844610', 'step': 18415, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:45.900723', 'step': 18415, 'epoch': 3} {'type': 'loss', 'content': 0.18124951422214508, 'timestamp': '2025-10-01 04:42:45.906841', 'step': 18416, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-01 04:42:45.960013', 'step': 18416, 'epoch': 3} {'type': 'loss', 'content': 0.03323037177324295, 'timestamp': '2025-10-01 04:42:45.962191', 'step': 18417, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:46.015570', 'step': 18417, 'epoch': 3} {'type': 'loss', 'content': 0.11899352073669434, 'timestamp': '2025-10-01 04:42:46.017635', 'step': 18418, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:46.071596', 'step': 18418, 'epoch': 3} {'type': 'loss', 'content': 0.1162274107336998, 'timestamp': '2025-10-01 04:42:46.074128', 'step': 18419, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:46.127948', 'step': 18419, 'epoch': 3} {'type': 'loss', 'content': 0.08479402959346771, 'timestamp': '2025-10-01 04:42:46.134420', 'step': 18420, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:46.187143', 'step': 18420, 'epoch': 3} {'type': 'loss', 'content': 0.12615114450454712, 'timestamp': '2025-10-01 04:42:46.189923', 'step': 18421, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:46.244008', 'step': 18421, 'epoch': 3} {'type': 'loss', 'content': 0.11875669658184052, 'timestamp': '2025-10-01 04:42:46.257232', 'step': 18422, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:46.322126', 'step': 18422, 'epoch': 3} {'type': 'loss', 'content': 0.06220794469118118, 'timestamp': '2025-10-01 04:42:46.324582', 'step': 18423, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:46.377409', 'step': 18423, 'epoch': 3} {'type': 'loss', 'content': 0.11757148802280426, 'timestamp': '2025-10-01 04:42:46.383570', 'step': 18424, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:46.449434', 'step': 18424, 'epoch': 3} {'type': 'loss', 'content': 0.06969471275806427, 'timestamp': '2025-10-01 04:42:46.451570', 'step': 18425, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:42:46.504798', 'step': 18425, 'epoch': 3} {'type': 'loss', 'content': 0.058495666831731796, 'timestamp': '2025-10-01 04:42:46.507885', 'step': 18426, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:46.562361', 'step': 18426, 'epoch': 3} {'type': 'loss', 'content': 0.1348380446434021, 'timestamp': '2025-10-01 04:42:46.568524', 'step': 18427, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:46.623336', 'step': 18427, 'epoch': 3} {'type': 'loss', 'content': 0.07622195035219193, 'timestamp': '2025-10-01 04:42:46.629025', 'step': 18428, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:46.681832', 'step': 18428, 'epoch': 3} {'type': 'loss', 'content': 0.08431457728147507, 'timestamp': '2025-10-01 04:42:46.697878', 'step': 18429, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:46.751941', 'step': 18429, 'epoch': 3} {'type': 'loss', 'content': 0.12745881080627441, 'timestamp': '2025-10-01 04:42:46.754421', 'step': 18430, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:46.809721', 'step': 18430, 'epoch': 3} {'type': 'loss', 'content': 0.15318544209003448, 'timestamp': '2025-10-01 04:42:46.812060', 'step': 18431, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:46.866100', 'step': 18431, 'epoch': 3} {'type': 'loss', 'content': 0.12196940183639526, 'timestamp': '2025-10-01 04:42:46.872168', 'step': 18432, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:46.925909', 'step': 18432, 'epoch': 3} {'type': 'loss', 'content': 0.0707949697971344, 'timestamp': '2025-10-01 04:42:46.928049', 'step': 18433, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:46.981450', 'step': 18433, 'epoch': 3} {'type': 'loss', 'content': 0.11604004353284836, 'timestamp': '2025-10-01 04:42:46.983574', 'step': 18434, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:47.037806', 'step': 18434, 'epoch': 3} {'type': 'loss', 'content': 0.18994615972042084, 'timestamp': '2025-10-01 04:42:47.040030', 'step': 18435, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:47.093247', 'step': 18435, 'epoch': 3} {'type': 'loss', 'content': 0.10093384981155396, 'timestamp': '2025-10-01 04:42:47.099063', 'step': 18436, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:47.151786', 'step': 18436, 'epoch': 3} {'type': 'loss', 'content': 0.10191590338945389, 'timestamp': '2025-10-01 04:42:47.153964', 'step': 18437, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:42:47.207974', 'step': 18437, 'epoch': 3} {'type': 'loss', 'content': 0.13493268191814423, 'timestamp': '2025-10-01 04:42:47.213898', 'step': 18438, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:42:47.267903', 'step': 18438, 'epoch': 3} {'type': 'loss', 'content': 0.07542432099580765, 'timestamp': '2025-10-01 04:42:47.270051', 'step': 18439, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:47.323591', 'step': 18439, 'epoch': 3} {'type': 'loss', 'content': 0.15907201170921326, 'timestamp': '2025-10-01 04:42:47.329323', 'step': 18440, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:47.384162', 'step': 18440, 'epoch': 3} {'type': 'loss', 'content': 0.0803442895412445, 'timestamp': '2025-10-01 04:42:47.386380', 'step': 18441, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:47.440379', 'step': 18441, 'epoch': 3} {'type': 'loss', 'content': 0.0869431346654892, 'timestamp': '2025-10-01 04:42:47.443235', 'step': 18442, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:42:47.506898', 'step': 18442, 'epoch': 3} {'type': 'loss', 'content': 0.1445286124944687, 'timestamp': '2025-10-01 04:42:47.509701', 'step': 18443, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:47.565587', 'step': 18443, 'epoch': 3} {'type': 'loss', 'content': 0.06092345342040062, 'timestamp': '2025-10-01 04:42:47.571256', 'step': 18444, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-01 04:42:47.626117', 'step': 18444, 'epoch': 3} {'type': 'loss', 'content': 0.07167813181877136, 'timestamp': '2025-10-01 04:42:47.629294', 'step': 18445, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:47.702617', 'step': 18445, 'epoch': 3} {'type': 'loss', 'content': 0.07324102520942688, 'timestamp': '2025-10-01 04:42:47.706323', 'step': 18446, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:47.761841', 'step': 18446, 'epoch': 3} {'type': 'loss', 'content': 0.14536473155021667, 'timestamp': '2025-10-01 04:42:47.764518', 'step': 18447, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:42:47.818198', 'step': 18447, 'epoch': 3} {'type': 'loss', 'content': 0.06289181113243103, 'timestamp': '2025-10-01 04:42:47.824202', 'step': 18448, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:47.897993', 'step': 18448, 'epoch': 3} {'type': 'loss', 'content': 0.09095302224159241, 'timestamp': '2025-10-01 04:42:47.900351', 'step': 18449, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:47.966055', 'step': 18449, 'epoch': 3} {'type': 'loss', 'content': 0.08447961509227753, 'timestamp': '2025-10-01 04:42:47.968369', 'step': 18450, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:42:48.022884', 'step': 18450, 'epoch': 3} {'type': 'loss', 'content': 0.04997708648443222, 'timestamp': '2025-10-01 04:42:48.025197', 'step': 18451, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:48.079338', 'step': 18451, 'epoch': 3} {'type': 'loss', 'content': 0.06490525603294373, 'timestamp': '2025-10-01 04:42:48.086733', 'step': 18452, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:48.139979', 'step': 18452, 'epoch': 3} {'type': 'loss', 'content': 0.09261925518512726, 'timestamp': '2025-10-01 04:42:48.142128', 'step': 18453, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:48.196061', 'step': 18453, 'epoch': 3} {'type': 'loss', 'content': 0.1201895996928215, 'timestamp': '2025-10-01 04:42:48.199386', 'step': 18454, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:48.256173', 'step': 18454, 'epoch': 3} {'type': 'loss', 'content': 0.0680539533495903, 'timestamp': '2025-10-01 04:42:48.258331', 'step': 18455, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:48.311676', 'step': 18455, 'epoch': 3} {'type': 'loss', 'content': 0.09060383588075638, 'timestamp': '2025-10-01 04:42:48.319152', 'step': 18456, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:48.375890', 'step': 18456, 'epoch': 3} {'type': 'loss', 'content': 0.11419110745191574, 'timestamp': '2025-10-01 04:42:48.378375', 'step': 18457, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:48.432287', 'step': 18457, 'epoch': 3} {'type': 'loss', 'content': 0.028265420347452164, 'timestamp': '2025-10-01 04:42:48.434376', 'step': 18458, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:48.487674', 'step': 18458, 'epoch': 3} {'type': 'loss', 'content': 0.14999344944953918, 'timestamp': '2025-10-01 04:42:48.490152', 'step': 18459, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:48.546090', 'step': 18459, 'epoch': 3} {'type': 'loss', 'content': 0.1613626331090927, 'timestamp': '2025-10-01 04:42:48.553086', 'step': 18460, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:48.615386', 'step': 18460, 'epoch': 3} {'type': 'loss', 'content': 0.11041150242090225, 'timestamp': '2025-10-01 04:42:48.617650', 'step': 18461, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:48.671170', 'step': 18461, 'epoch': 3} {'type': 'loss', 'content': 0.12453385442495346, 'timestamp': '2025-10-01 04:42:48.673520', 'step': 18462, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:42:48.727796', 'step': 18462, 'epoch': 3} {'type': 'loss', 'content': 0.11356336623430252, 'timestamp': '2025-10-01 04:42:48.729899', 'step': 18463, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:48.796724', 'step': 18463, 'epoch': 3} {'type': 'loss', 'content': 0.1357330083847046, 'timestamp': '2025-10-01 04:42:48.803078', 'step': 18464, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:48.855832', 'step': 18464, 'epoch': 3} {'type': 'loss', 'content': 0.07619758695363998, 'timestamp': '2025-10-01 04:42:48.858092', 'step': 18465, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:48.920091', 'step': 18465, 'epoch': 3} {'type': 'loss', 'content': 0.1848801225423813, 'timestamp': '2025-10-01 04:42:48.924210', 'step': 18466, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:42:48.980473', 'step': 18466, 'epoch': 3} {'type': 'loss', 'content': 0.12170829623937607, 'timestamp': '2025-10-01 04:42:48.982629', 'step': 18467, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:42:49.056442', 'step': 18467, 'epoch': 3} {'type': 'loss', 'content': 0.04496997594833374, 'timestamp': '2025-10-01 04:42:49.062138', 'step': 18468, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:49.115194', 'step': 18468, 'epoch': 3} {'type': 'loss', 'content': 0.03290162608027458, 'timestamp': '2025-10-01 04:42:49.117531', 'step': 18469, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:49.170572', 'step': 18469, 'epoch': 3} {'type': 'loss', 'content': 0.14255402982234955, 'timestamp': '2025-10-01 04:42:49.172777', 'step': 18470, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:49.235698', 'step': 18470, 'epoch': 3} {'type': 'loss', 'content': 0.0940999761223793, 'timestamp': '2025-10-01 04:42:49.237888', 'step': 18471, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:49.292897', 'step': 18471, 'epoch': 3} {'type': 'loss', 'content': 0.05989568307995796, 'timestamp': '2025-10-01 04:42:49.301078', 'step': 18472, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:49.354218', 'step': 18472, 'epoch': 3} {'type': 'loss', 'content': 0.0489349365234375, 'timestamp': '2025-10-01 04:42:49.356359', 'step': 18473, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:42:49.409640', 'step': 18473, 'epoch': 3} {'type': 'loss', 'content': 0.07912196964025497, 'timestamp': '2025-10-01 04:42:49.415270', 'step': 18474, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:49.468633', 'step': 18474, 'epoch': 3} {'type': 'loss', 'content': 0.07667829096317291, 'timestamp': '2025-10-01 04:42:49.470816', 'step': 18475, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:49.524270', 'step': 18475, 'epoch': 3} {'type': 'loss', 'content': 0.06441447138786316, 'timestamp': '2025-10-01 04:42:49.530083', 'step': 18476, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:49.586076', 'step': 18476, 'epoch': 3} {'type': 'loss', 'content': 0.09084489196538925, 'timestamp': '2025-10-01 04:42:49.588265', 'step': 18477, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:49.641833', 'step': 18477, 'epoch': 3} {'type': 'loss', 'content': 0.0925355926156044, 'timestamp': '2025-10-01 04:42:49.644127', 'step': 18478, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:49.697781', 'step': 18478, 'epoch': 3} {'type': 'loss', 'content': 0.15408873558044434, 'timestamp': '2025-10-01 04:42:49.700106', 'step': 18479, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:49.753631', 'step': 18479, 'epoch': 3} {'type': 'loss', 'content': 0.016561836004257202, 'timestamp': '2025-10-01 04:42:49.759172', 'step': 18480, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:49.811704', 'step': 18480, 'epoch': 3} {'type': 'loss', 'content': 0.10925302654504776, 'timestamp': '2025-10-01 04:42:49.814218', 'step': 18481, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:42:49.869887', 'step': 18481, 'epoch': 3} {'type': 'loss', 'content': 0.13504527509212494, 'timestamp': '2025-10-01 04:42:49.872113', 'step': 18482, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:42:49.925807', 'step': 18482, 'epoch': 3} {'type': 'loss', 'content': 0.09879672527313232, 'timestamp': '2025-10-01 04:42:49.928291', 'step': 18483, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:49.982470', 'step': 18483, 'epoch': 3} {'type': 'loss', 'content': 0.07963337749242783, 'timestamp': '2025-10-01 04:42:49.988600', 'step': 18484, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:50.042164', 'step': 18484, 'epoch': 3} {'type': 'loss', 'content': 0.05617007985711098, 'timestamp': '2025-10-01 04:42:50.044750', 'step': 18485, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:42:50.098299', 'step': 18485, 'epoch': 3} {'type': 'loss', 'content': 0.13603897392749786, 'timestamp': '2025-10-01 04:42:50.102032', 'step': 18486, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:50.157907', 'step': 18486, 'epoch': 3} {'type': 'loss', 'content': 0.03990733250975609, 'timestamp': '2025-10-01 04:42:50.160333', 'step': 18487, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:50.214797', 'step': 18487, 'epoch': 3} {'type': 'loss', 'content': 0.07102391868829727, 'timestamp': '2025-10-01 04:42:50.220750', 'step': 18488, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:42:50.274487', 'step': 18488, 'epoch': 3} {'type': 'loss', 'content': 0.11656927317380905, 'timestamp': '2025-10-01 04:42:50.276679', 'step': 18489, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:50.330926', 'step': 18489, 'epoch': 3} {'type': 'loss', 'content': 0.13734275102615356, 'timestamp': '2025-10-01 04:42:50.333210', 'step': 18490, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:42:50.387058', 'step': 18490, 'epoch': 3} {'type': 'loss', 'content': 0.0933627113699913, 'timestamp': '2025-10-01 04:42:50.389423', 'step': 18491, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:50.444008', 'step': 18491, 'epoch': 3} {'type': 'loss', 'content': 0.1455398052930832, 'timestamp': '2025-10-01 04:42:50.450961', 'step': 18492, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:42:50.506747', 'step': 18492, 'epoch': 3} {'type': 'loss', 'content': 0.12839938700199127, 'timestamp': '2025-10-01 04:42:50.509100', 'step': 18493, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:50.563054', 'step': 18493, 'epoch': 3} {'type': 'loss', 'content': 0.1501527726650238, 'timestamp': '2025-10-01 04:42:50.565570', 'step': 18494, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:42:50.620491', 'step': 18494, 'epoch': 3} {'type': 'loss', 'content': 0.09883254766464233, 'timestamp': '2025-10-01 04:42:50.622972', 'step': 18495, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:50.677622', 'step': 18495, 'epoch': 3} {'type': 'loss', 'content': 0.11115120351314545, 'timestamp': '2025-10-01 04:42:50.683455', 'step': 18496, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:50.737890', 'step': 18496, 'epoch': 3} {'type': 'loss', 'content': 0.13593384623527527, 'timestamp': '2025-10-01 04:42:50.740526', 'step': 18497, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:50.794791', 'step': 18497, 'epoch': 3} {'type': 'loss', 'content': 0.13154467940330505, 'timestamp': '2025-10-01 04:42:50.797300', 'step': 18498, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:50.851900', 'step': 18498, 'epoch': 3} {'type': 'loss', 'content': 0.08831634372472763, 'timestamp': '2025-10-01 04:42:50.854433', 'step': 18499, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:50.908792', 'step': 18499, 'epoch': 3} {'type': 'loss', 'content': 0.06681101024150848, 'timestamp': '2025-10-01 04:42:50.914816', 'step': 18500, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 18500', 'timestamp': '2025-10-01 04:42:51.290200', 'step': 18500, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:51.345109', 'step': 18500, 'epoch': 3} {'type': 'loss', 'content': 0.09010928124189377, 'timestamp': '2025-10-01 04:42:51.347298', 'step': 18501, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:51.401241', 'step': 18501, 'epoch': 3} {'type': 'loss', 'content': 0.11078830808401108, 'timestamp': '2025-10-01 04:42:51.403470', 'step': 18502, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:51.456597', 'step': 18502, 'epoch': 3} {'type': 'loss', 'content': 0.03400513902306557, 'timestamp': '2025-10-01 04:42:51.458838', 'step': 18503, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:42:51.513686', 'step': 18503, 'epoch': 3} {'type': 'loss', 'content': 0.09760667383670807, 'timestamp': '2025-10-01 04:42:51.519479', 'step': 18504, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:51.574734', 'step': 18504, 'epoch': 3} {'type': 'loss', 'content': 0.14338012039661407, 'timestamp': '2025-10-01 04:42:51.576849', 'step': 18505, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:51.630347', 'step': 18505, 'epoch': 3} {'type': 'loss', 'content': 0.15592652559280396, 'timestamp': '2025-10-01 04:42:51.632929', 'step': 18506, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:51.692717', 'step': 18506, 'epoch': 3} {'type': 'loss', 'content': 0.024146541953086853, 'timestamp': '2025-10-01 04:42:51.694838', 'step': 18507, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:51.748829', 'step': 18507, 'epoch': 3} {'type': 'loss', 'content': 0.06324514001607895, 'timestamp': '2025-10-01 04:42:51.754661', 'step': 18508, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:51.811972', 'step': 18508, 'epoch': 3} {'type': 'loss', 'content': 0.10481348633766174, 'timestamp': '2025-10-01 04:42:51.814249', 'step': 18509, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:51.869126', 'step': 18509, 'epoch': 3} {'type': 'loss', 'content': 0.11069459468126297, 'timestamp': '2025-10-01 04:42:51.871204', 'step': 18510, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:51.924341', 'step': 18510, 'epoch': 3} {'type': 'loss', 'content': 0.05267724767327309, 'timestamp': '2025-10-01 04:42:51.926601', 'step': 18511, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:51.979759', 'step': 18511, 'epoch': 3} {'type': 'loss', 'content': 0.06872429698705673, 'timestamp': '2025-10-01 04:42:51.986152', 'step': 18512, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:42:52.039162', 'step': 18512, 'epoch': 3} {'type': 'loss', 'content': 0.05987017601728439, 'timestamp': '2025-10-01 04:42:52.041329', 'step': 18513, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:52.094687', 'step': 18513, 'epoch': 3} {'type': 'loss', 'content': 0.11920221149921417, 'timestamp': '2025-10-01 04:42:52.096868', 'step': 18514, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:52.150073', 'step': 18514, 'epoch': 3} {'type': 'loss', 'content': 0.06384027749300003, 'timestamp': '2025-10-01 04:42:52.152372', 'step': 18515, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:52.205759', 'step': 18515, 'epoch': 3} {'type': 'loss', 'content': 0.04277168959379196, 'timestamp': '2025-10-01 04:42:52.211477', 'step': 18516, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:42:52.269918', 'step': 18516, 'epoch': 3} {'type': 'loss', 'content': 0.1512109339237213, 'timestamp': '2025-10-01 04:42:52.272031', 'step': 18517, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:42:52.325556', 'step': 18517, 'epoch': 3} {'type': 'loss', 'content': 0.051021311432123184, 'timestamp': '2025-10-01 04:42:52.327663', 'step': 18518, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:52.381376', 'step': 18518, 'epoch': 3} {'type': 'loss', 'content': 0.08252476155757904, 'timestamp': '2025-10-01 04:42:52.383485', 'step': 18519, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:52.436904', 'step': 18519, 'epoch': 3} {'type': 'loss', 'content': 0.07972640544176102, 'timestamp': '2025-10-01 04:42:52.442736', 'step': 18520, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:52.495575', 'step': 18520, 'epoch': 3} {'type': 'loss', 'content': 0.1135038286447525, 'timestamp': '2025-10-01 04:42:52.497921', 'step': 18521, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:52.553049', 'step': 18521, 'epoch': 3} {'type': 'loss', 'content': 0.034453973174095154, 'timestamp': '2025-10-01 04:42:52.555352', 'step': 18522, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:52.609151', 'step': 18522, 'epoch': 3} {'type': 'loss', 'content': 0.03223513066768646, 'timestamp': '2025-10-01 04:42:52.611428', 'step': 18523, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:52.665916', 'step': 18523, 'epoch': 3} {'type': 'loss', 'content': 0.10305566340684891, 'timestamp': '2025-10-01 04:42:52.678809', 'step': 18524, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:52.737002', 'step': 18524, 'epoch': 3} {'type': 'loss', 'content': 0.05530406907200813, 'timestamp': '2025-10-01 04:42:52.739300', 'step': 18525, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:42:52.794166', 'step': 18525, 'epoch': 3} {'type': 'loss', 'content': 0.054804980754852295, 'timestamp': '2025-10-01 04:42:52.796523', 'step': 18526, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:52.849750', 'step': 18526, 'epoch': 3} {'type': 'loss', 'content': 0.20256458222866058, 'timestamp': '2025-10-01 04:42:52.851891', 'step': 18527, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:52.904927', 'step': 18527, 'epoch': 3} {'type': 'loss', 'content': 0.11299873143434525, 'timestamp': '2025-10-01 04:42:52.910679', 'step': 18528, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:52.963196', 'step': 18528, 'epoch': 3} {'type': 'loss', 'content': 0.1326729655265808, 'timestamp': '2025-10-01 04:42:52.965271', 'step': 18529, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:53.017949', 'step': 18529, 'epoch': 3} {'type': 'loss', 'content': 0.09336830675601959, 'timestamp': '2025-10-01 04:42:53.021521', 'step': 18530, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:53.075380', 'step': 18530, 'epoch': 3} {'type': 'loss', 'content': 0.20775564014911652, 'timestamp': '2025-10-01 04:42:53.078450', 'step': 18531, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:53.137416', 'step': 18531, 'epoch': 3} {'type': 'loss', 'content': 0.1504085510969162, 'timestamp': '2025-10-01 04:42:53.143395', 'step': 18532, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:42:53.198678', 'step': 18532, 'epoch': 3} {'type': 'loss', 'content': 0.08584129810333252, 'timestamp': '2025-10-01 04:42:53.204434', 'step': 18533, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:53.258821', 'step': 18533, 'epoch': 3} {'type': 'loss', 'content': 0.19308072328567505, 'timestamp': '2025-10-01 04:42:53.261041', 'step': 18534, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:42:53.314589', 'step': 18534, 'epoch': 3} {'type': 'loss', 'content': 0.04005756229162216, 'timestamp': '2025-10-01 04:42:53.317858', 'step': 18535, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:53.371973', 'step': 18535, 'epoch': 3} {'type': 'loss', 'content': 0.1453169584274292, 'timestamp': '2025-10-01 04:42:53.389307', 'step': 18536, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:42:53.443381', 'step': 18536, 'epoch': 3} {'type': 'loss', 'content': 0.06756281852722168, 'timestamp': '2025-10-01 04:42:53.445598', 'step': 18537, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:53.500199', 'step': 18537, 'epoch': 3} {'type': 'loss', 'content': 0.10774513334035873, 'timestamp': '2025-10-01 04:42:53.504074', 'step': 18538, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:53.559552', 'step': 18538, 'epoch': 3} {'type': 'loss', 'content': 0.05891117453575134, 'timestamp': '2025-10-01 04:42:53.561893', 'step': 18539, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:53.616521', 'step': 18539, 'epoch': 3} {'type': 'loss', 'content': 0.11834844946861267, 'timestamp': '2025-10-01 04:42:53.622216', 'step': 18540, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:53.676383', 'step': 18540, 'epoch': 3} {'type': 'loss', 'content': 0.11451661586761475, 'timestamp': '2025-10-01 04:42:53.679662', 'step': 18541, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:42:53.733033', 'step': 18541, 'epoch': 3} {'type': 'loss', 'content': 0.06760592758655548, 'timestamp': '2025-10-01 04:42:53.735521', 'step': 18542, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:53.788444', 'step': 18542, 'epoch': 3} {'type': 'loss', 'content': 0.10247749835252762, 'timestamp': '2025-10-01 04:42:53.790747', 'step': 18543, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:53.846271', 'step': 18543, 'epoch': 3} {'type': 'loss', 'content': 0.14356529712677002, 'timestamp': '2025-10-01 04:42:53.854794', 'step': 18544, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:53.937847', 'step': 18544, 'epoch': 3} {'type': 'loss', 'content': 0.07233300060033798, 'timestamp': '2025-10-01 04:42:53.939990', 'step': 18545, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:53.993063', 'step': 18545, 'epoch': 3} {'type': 'loss', 'content': 0.031231354922056198, 'timestamp': '2025-10-01 04:42:53.996018', 'step': 18546, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:54.049995', 'step': 18546, 'epoch': 3} {'type': 'loss', 'content': 0.051552820950746536, 'timestamp': '2025-10-01 04:42:54.052141', 'step': 18547, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:54.106169', 'step': 18547, 'epoch': 3} {'type': 'loss', 'content': 0.04978112876415253, 'timestamp': '2025-10-01 04:42:54.111939', 'step': 18548, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:54.165237', 'step': 18548, 'epoch': 3} {'type': 'loss', 'content': 0.14013907313346863, 'timestamp': '2025-10-01 04:42:54.167516', 'step': 18549, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:54.220707', 'step': 18549, 'epoch': 3} {'type': 'loss', 'content': 0.11295581609010696, 'timestamp': '2025-10-01 04:42:54.223345', 'step': 18550, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:54.277948', 'step': 18550, 'epoch': 3} {'type': 'loss', 'content': 0.12457982450723648, 'timestamp': '2025-10-01 04:42:54.280297', 'step': 18551, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:54.335525', 'step': 18551, 'epoch': 3} {'type': 'loss', 'content': 0.1371370404958725, 'timestamp': '2025-10-01 04:42:54.341578', 'step': 18552, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:54.394092', 'step': 18552, 'epoch': 3} {'type': 'loss', 'content': 0.12054836004972458, 'timestamp': '2025-10-01 04:42:54.396205', 'step': 18553, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:54.449338', 'step': 18553, 'epoch': 3} {'type': 'loss', 'content': 0.12596175074577332, 'timestamp': '2025-10-01 04:42:54.451698', 'step': 18554, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:42:54.505228', 'step': 18554, 'epoch': 3} {'type': 'loss', 'content': 0.05294132977724075, 'timestamp': '2025-10-01 04:42:54.507522', 'step': 18555, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:54.561683', 'step': 18555, 'epoch': 3} {'type': 'loss', 'content': 0.12931299209594727, 'timestamp': '2025-10-01 04:42:54.568170', 'step': 18556, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:42:54.621902', 'step': 18556, 'epoch': 3} {'type': 'loss', 'content': 0.09240134060382843, 'timestamp': '2025-10-01 04:42:54.624119', 'step': 18557, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:54.681592', 'step': 18557, 'epoch': 3} {'type': 'loss', 'content': 0.08001573383808136, 'timestamp': '2025-10-01 04:42:54.684141', 'step': 18558, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:54.739086', 'step': 18558, 'epoch': 3} {'type': 'loss', 'content': 0.10414359718561172, 'timestamp': '2025-10-01 04:42:54.741665', 'step': 18559, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:54.795407', 'step': 18559, 'epoch': 3} {'type': 'loss', 'content': 0.12083525210618973, 'timestamp': '2025-10-01 04:42:54.817894', 'step': 18560, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:42:54.871914', 'step': 18560, 'epoch': 3} {'type': 'loss', 'content': 0.11657927930355072, 'timestamp': '2025-10-01 04:42:54.874793', 'step': 18561, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:54.933083', 'step': 18561, 'epoch': 3} {'type': 'loss', 'content': 0.10204900056123734, 'timestamp': '2025-10-01 04:42:54.935448', 'step': 18562, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:54.989912', 'step': 18562, 'epoch': 3} {'type': 'loss', 'content': 0.07037924975156784, 'timestamp': '2025-10-01 04:42:54.992032', 'step': 18563, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:55.045669', 'step': 18563, 'epoch': 3} {'type': 'loss', 'content': 0.08974766731262207, 'timestamp': '2025-10-01 04:42:55.053099', 'step': 18564, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:42:55.105915', 'step': 18564, 'epoch': 3} {'type': 'loss', 'content': 0.054310496896505356, 'timestamp': '2025-10-01 04:42:55.108155', 'step': 18565, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:42:55.161061', 'step': 18565, 'epoch': 3} {'type': 'loss', 'content': 0.0977124571800232, 'timestamp': '2025-10-01 04:42:55.163268', 'step': 18566, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:55.216568', 'step': 18566, 'epoch': 3} {'type': 'loss', 'content': 0.0395013764500618, 'timestamp': '2025-10-01 04:42:55.218807', 'step': 18567, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:55.272886', 'step': 18567, 'epoch': 3} {'type': 'loss', 'content': 0.1412903070449829, 'timestamp': '2025-10-01 04:42:55.278954', 'step': 18568, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:55.331751', 'step': 18568, 'epoch': 3} {'type': 'loss', 'content': 0.09298915416002274, 'timestamp': '2025-10-01 04:42:55.333908', 'step': 18569, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:42:55.387373', 'step': 18569, 'epoch': 3} {'type': 'loss', 'content': 0.2041148543357849, 'timestamp': '2025-10-01 04:42:55.389480', 'step': 18570, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:55.443376', 'step': 18570, 'epoch': 3} {'type': 'loss', 'content': 0.1111159697175026, 'timestamp': '2025-10-01 04:42:55.445956', 'step': 18571, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-01 04:42:55.506463', 'step': 18571, 'epoch': 3} {'type': 'loss', 'content': 0.08109482377767563, 'timestamp': '2025-10-01 04:42:55.517680', 'step': 18572, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:55.571612', 'step': 18572, 'epoch': 3} {'type': 'loss', 'content': 0.051029495894908905, 'timestamp': '2025-10-01 04:42:55.573784', 'step': 18573, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:55.628301', 'step': 18573, 'epoch': 3} {'type': 'loss', 'content': 0.11001718044281006, 'timestamp': '2025-10-01 04:42:55.630485', 'step': 18574, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:55.686931', 'step': 18574, 'epoch': 3} {'type': 'loss', 'content': 0.04155156761407852, 'timestamp': '2025-10-01 04:42:55.689379', 'step': 18575, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:55.744704', 'step': 18575, 'epoch': 3} {'type': 'loss', 'content': 0.14306063950061798, 'timestamp': '2025-10-01 04:42:55.750854', 'step': 18576, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:55.804899', 'step': 18576, 'epoch': 3} {'type': 'loss', 'content': 0.12810194492340088, 'timestamp': '2025-10-01 04:42:55.807129', 'step': 18577, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:55.861793', 'step': 18577, 'epoch': 3} {'type': 'loss', 'content': 0.05594755336642265, 'timestamp': '2025-10-01 04:42:55.864056', 'step': 18578, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:55.917889', 'step': 18578, 'epoch': 3} {'type': 'loss', 'content': 0.03905356675386429, 'timestamp': '2025-10-01 04:42:55.920415', 'step': 18579, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:42:55.974950', 'step': 18579, 'epoch': 3} {'type': 'loss', 'content': 0.1962047815322876, 'timestamp': '2025-10-01 04:42:55.981554', 'step': 18580, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:56.036046', 'step': 18580, 'epoch': 3} {'type': 'loss', 'content': 0.10744450986385345, 'timestamp': '2025-10-01 04:42:56.038129', 'step': 18581, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:42:56.092900', 'step': 18581, 'epoch': 3} {'type': 'loss', 'content': 0.10780594497919083, 'timestamp': '2025-10-01 04:42:56.095101', 'step': 18582, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:56.152977', 'step': 18582, 'epoch': 3} {'type': 'loss', 'content': 0.1744549423456192, 'timestamp': '2025-10-01 04:42:56.155110', 'step': 18583, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:56.210889', 'step': 18583, 'epoch': 3} {'type': 'loss', 'content': 0.06675069779157639, 'timestamp': '2025-10-01 04:42:56.217992', 'step': 18584, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:42:56.273959', 'step': 18584, 'epoch': 3} {'type': 'loss', 'content': 0.07668770849704742, 'timestamp': '2025-10-01 04:42:56.276050', 'step': 18585, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:56.331850', 'step': 18585, 'epoch': 3} {'type': 'loss', 'content': 0.14973099529743195, 'timestamp': '2025-10-01 04:42:56.334171', 'step': 18586, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:56.392584', 'step': 18586, 'epoch': 3} {'type': 'loss', 'content': 0.11267008632421494, 'timestamp': '2025-10-01 04:42:56.394823', 'step': 18587, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:56.451178', 'step': 18587, 'epoch': 3} {'type': 'loss', 'content': 0.06805336475372314, 'timestamp': '2025-10-01 04:42:56.458248', 'step': 18588, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:42:56.513236', 'step': 18588, 'epoch': 3} {'type': 'loss', 'content': 0.05282340943813324, 'timestamp': '2025-10-01 04:42:56.515603', 'step': 18589, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:56.573238', 'step': 18589, 'epoch': 3} {'type': 'loss', 'content': 0.09977615624666214, 'timestamp': '2025-10-01 04:42:56.575386', 'step': 18590, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:56.633388', 'step': 18590, 'epoch': 3} {'type': 'loss', 'content': 0.07707594335079193, 'timestamp': '2025-10-01 04:42:56.635693', 'step': 18591, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:42:56.690396', 'step': 18591, 'epoch': 3} {'type': 'loss', 'content': 0.12071307748556137, 'timestamp': '2025-10-01 04:42:56.696636', 'step': 18592, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:56.750669', 'step': 18592, 'epoch': 3} {'type': 'loss', 'content': 0.05029553920030594, 'timestamp': '2025-10-01 04:42:56.752971', 'step': 18593, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:56.807446', 'step': 18593, 'epoch': 3} {'type': 'loss', 'content': 0.11891341209411621, 'timestamp': '2025-10-01 04:42:56.810025', 'step': 18594, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:56.864855', 'step': 18594, 'epoch': 3} {'type': 'loss', 'content': 0.15984801948070526, 'timestamp': '2025-10-01 04:42:56.867063', 'step': 18595, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:56.920450', 'step': 18595, 'epoch': 3} {'type': 'loss', 'content': 0.029029453173279762, 'timestamp': '2025-10-01 04:42:56.927036', 'step': 18596, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:56.982144', 'step': 18596, 'epoch': 3} {'type': 'loss', 'content': 0.0859571099281311, 'timestamp': '2025-10-01 04:42:56.984307', 'step': 18597, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:57.038653', 'step': 18597, 'epoch': 3} {'type': 'loss', 'content': 0.06911779940128326, 'timestamp': '2025-10-01 04:42:57.040827', 'step': 18598, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:57.097125', 'step': 18598, 'epoch': 3} {'type': 'loss', 'content': 0.10096129775047302, 'timestamp': '2025-10-01 04:42:57.099282', 'step': 18599, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:57.153335', 'step': 18599, 'epoch': 3} {'type': 'loss', 'content': 0.12536337971687317, 'timestamp': '2025-10-01 04:42:57.160930', 'step': 18600, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-01 04:42:57.213828', 'step': 18600, 'epoch': 3} {'type': 'loss', 'content': 0.07985830307006836, 'timestamp': '2025-10-01 04:42:57.215957', 'step': 18601, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:57.269878', 'step': 18601, 'epoch': 3} {'type': 'loss', 'content': 0.0654887706041336, 'timestamp': '2025-10-01 04:42:57.272037', 'step': 18602, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:57.325708', 'step': 18602, 'epoch': 3} {'type': 'loss', 'content': 0.11847081035375595, 'timestamp': '2025-10-01 04:42:57.327924', 'step': 18603, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:57.381731', 'step': 18603, 'epoch': 3} {'type': 'loss', 'content': 0.09512197226285934, 'timestamp': '2025-10-01 04:42:57.387603', 'step': 18604, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:57.441018', 'step': 18604, 'epoch': 3} {'type': 'loss', 'content': 0.07001646608114243, 'timestamp': '2025-10-01 04:42:57.443463', 'step': 18605, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:57.498481', 'step': 18605, 'epoch': 3} {'type': 'loss', 'content': 0.23822002112865448, 'timestamp': '2025-10-01 04:42:57.500858', 'step': 18606, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:57.554966', 'step': 18606, 'epoch': 3} {'type': 'loss', 'content': 0.06595712900161743, 'timestamp': '2025-10-01 04:42:57.557158', 'step': 18607, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:57.611237', 'step': 18607, 'epoch': 3} {'type': 'loss', 'content': 0.08325619995594025, 'timestamp': '2025-10-01 04:42:57.617608', 'step': 18608, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:42:57.671338', 'step': 18608, 'epoch': 3} {'type': 'loss', 'content': 0.13555017113685608, 'timestamp': '2025-10-01 04:42:57.673610', 'step': 18609, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:57.727495', 'step': 18609, 'epoch': 3} {'type': 'loss', 'content': 0.13302603363990784, 'timestamp': '2025-10-01 04:42:57.729596', 'step': 18610, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:57.783595', 'step': 18610, 'epoch': 3} {'type': 'loss', 'content': 0.13526098430156708, 'timestamp': '2025-10-01 04:42:57.785696', 'step': 18611, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:57.839000', 'step': 18611, 'epoch': 3} {'type': 'loss', 'content': 0.07452668249607086, 'timestamp': '2025-10-01 04:42:57.844945', 'step': 18612, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:57.898290', 'step': 18612, 'epoch': 3} {'type': 'loss', 'content': 0.0712355375289917, 'timestamp': '2025-10-01 04:42:57.900386', 'step': 18613, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:57.953694', 'step': 18613, 'epoch': 3} {'type': 'loss', 'content': 0.14973630011081696, 'timestamp': '2025-10-01 04:42:57.955881', 'step': 18614, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:58.009155', 'step': 18614, 'epoch': 3} {'type': 'loss', 'content': 0.18347421288490295, 'timestamp': '2025-10-01 04:42:58.012832', 'step': 18615, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:58.068828', 'step': 18615, 'epoch': 3} {'type': 'loss', 'content': 0.03492855653166771, 'timestamp': '2025-10-01 04:42:58.074655', 'step': 18616, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:42:58.128382', 'step': 18616, 'epoch': 3} {'type': 'loss', 'content': 0.021936699748039246, 'timestamp': '2025-10-01 04:42:58.130515', 'step': 18617, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:58.184133', 'step': 18617, 'epoch': 3} {'type': 'loss', 'content': 0.09266629070043564, 'timestamp': '2025-10-01 04:42:58.186215', 'step': 18618, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:58.240884', 'step': 18618, 'epoch': 3} {'type': 'loss', 'content': 0.0486256442964077, 'timestamp': '2025-10-01 04:42:58.243177', 'step': 18619, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:42:58.300452', 'step': 18619, 'epoch': 3} {'type': 'loss', 'content': 0.04844652861356735, 'timestamp': '2025-10-01 04:42:58.306314', 'step': 18620, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:58.360110', 'step': 18620, 'epoch': 3} {'type': 'loss', 'content': 0.10474197566509247, 'timestamp': '2025-10-01 04:42:58.362357', 'step': 18621, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:58.416932', 'step': 18621, 'epoch': 3} {'type': 'loss', 'content': 0.10051526874303818, 'timestamp': '2025-10-01 04:42:58.419209', 'step': 18622, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:42:58.473000', 'step': 18622, 'epoch': 3} {'type': 'loss', 'content': 0.09960103034973145, 'timestamp': '2025-10-01 04:42:58.480175', 'step': 18623, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:58.533662', 'step': 18623, 'epoch': 3} {'type': 'loss', 'content': 0.17243832349777222, 'timestamp': '2025-10-01 04:42:58.539521', 'step': 18624, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:58.592314', 'step': 18624, 'epoch': 3} {'type': 'loss', 'content': 0.11805657297372818, 'timestamp': '2025-10-01 04:42:58.594451', 'step': 18625, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:58.648008', 'step': 18625, 'epoch': 3} {'type': 'loss', 'content': 0.07730282843112946, 'timestamp': '2025-10-01 04:42:58.650110', 'step': 18626, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:58.703650', 'step': 18626, 'epoch': 3} {'type': 'loss', 'content': 0.15022900700569153, 'timestamp': '2025-10-01 04:42:58.705718', 'step': 18627, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:58.759113', 'step': 18627, 'epoch': 3} {'type': 'loss', 'content': 0.11768116801977158, 'timestamp': '2025-10-01 04:42:58.764840', 'step': 18628, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:58.817307', 'step': 18628, 'epoch': 3} {'type': 'loss', 'content': 0.10506367683410645, 'timestamp': '2025-10-01 04:42:58.819445', 'step': 18629, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:42:58.871938', 'step': 18629, 'epoch': 3} {'type': 'loss', 'content': 0.03867243602871895, 'timestamp': '2025-10-01 04:42:58.873817', 'step': 18630, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:58.927180', 'step': 18630, 'epoch': 3} {'type': 'loss', 'content': 0.13168318569660187, 'timestamp': '2025-10-01 04:42:58.929372', 'step': 18631, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:58.988749', 'step': 18631, 'epoch': 3} {'type': 'loss', 'content': 0.051753781735897064, 'timestamp': '2025-10-01 04:42:58.994437', 'step': 18632, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:59.050875', 'step': 18632, 'epoch': 3} {'type': 'loss', 'content': 0.1451220065355301, 'timestamp': '2025-10-01 04:42:59.053027', 'step': 18633, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:59.106188', 'step': 18633, 'epoch': 3} {'type': 'loss', 'content': 0.12184995412826538, 'timestamp': '2025-10-01 04:42:59.108337', 'step': 18634, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:42:59.161447', 'step': 18634, 'epoch': 3} {'type': 'loss', 'content': 0.15942363440990448, 'timestamp': '2025-10-01 04:42:59.163362', 'step': 18635, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:59.216951', 'step': 18635, 'epoch': 3} {'type': 'loss', 'content': 0.12996253371238708, 'timestamp': '2025-10-01 04:42:59.222547', 'step': 18636, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:59.276573', 'step': 18636, 'epoch': 3} {'type': 'loss', 'content': 0.047866784036159515, 'timestamp': '2025-10-01 04:42:59.278938', 'step': 18637, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:42:59.331960', 'step': 18637, 'epoch': 3} {'type': 'loss', 'content': 0.05048340559005737, 'timestamp': '2025-10-01 04:42:59.333870', 'step': 18638, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:42:59.387183', 'step': 18638, 'epoch': 3} {'type': 'loss', 'content': 0.12905533611774445, 'timestamp': '2025-10-01 04:42:59.390462', 'step': 18639, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:42:59.444670', 'step': 18639, 'epoch': 3} {'type': 'loss', 'content': 0.0809805765748024, 'timestamp': '2025-10-01 04:42:59.450484', 'step': 18640, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-01 04:43:12.514965', 'step': 18640, 'epoch': 3} {'type': 'pplx', 'content': 9101.293234417422, 'timestamp': '2025-10-01 04:43:12.518742', 'step': 18640, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:43:12.572879', 'step': 18640, 'epoch': 3} {'type': 'loss', 'content': 0.136447936296463, 'timestamp': '2025-10-01 04:43:12.575085', 'step': 18641, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:12.629156', 'step': 18641, 'epoch': 3} {'type': 'loss', 'content': 0.018123602494597435, 'timestamp': '2025-10-01 04:43:12.631217', 'step': 18642, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:12.691968', 'step': 18642, 'epoch': 3} {'type': 'loss', 'content': 0.13088864088058472, 'timestamp': '2025-10-01 04:43:12.694168', 'step': 18643, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:12.751276', 'step': 18643, 'epoch': 3} {'type': 'loss', 'content': 0.10107719153165817, 'timestamp': '2025-10-01 04:43:12.764963', 'step': 18644, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:12.818747', 'step': 18644, 'epoch': 3} {'type': 'loss', 'content': 0.13418777287006378, 'timestamp': '2025-10-01 04:43:12.821887', 'step': 18645, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:12.875542', 'step': 18645, 'epoch': 3} {'type': 'loss', 'content': 0.12433130294084549, 'timestamp': '2025-10-01 04:43:12.877921', 'step': 18646, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:12.931398', 'step': 18646, 'epoch': 3} {'type': 'loss', 'content': 0.10641665756702423, 'timestamp': '2025-10-01 04:43:12.933644', 'step': 18647, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:12.993116', 'step': 18647, 'epoch': 3} {'type': 'loss', 'content': 0.0917535349726677, 'timestamp': '2025-10-01 04:43:12.999188', 'step': 18648, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:43:13.053000', 'step': 18648, 'epoch': 3} {'type': 'loss', 'content': 0.052322663366794586, 'timestamp': '2025-10-01 04:43:13.055462', 'step': 18649, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:13.108586', 'step': 18649, 'epoch': 3} {'type': 'loss', 'content': 0.1125626340508461, 'timestamp': '2025-10-01 04:43:13.110768', 'step': 18650, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:13.164438', 'step': 18650, 'epoch': 3} {'type': 'loss', 'content': 0.08352098613977432, 'timestamp': '2025-10-01 04:43:13.166704', 'step': 18651, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:13.219694', 'step': 18651, 'epoch': 3} {'type': 'loss', 'content': 0.18723560869693756, 'timestamp': '2025-10-01 04:43:13.225640', 'step': 18652, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:13.279237', 'step': 18652, 'epoch': 3} {'type': 'loss', 'content': 0.1389261782169342, 'timestamp': '2025-10-01 04:43:13.281617', 'step': 18653, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:13.334646', 'step': 18653, 'epoch': 3} {'type': 'loss', 'content': 0.08433552086353302, 'timestamp': '2025-10-01 04:43:13.336852', 'step': 18654, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:43:13.403551', 'step': 18654, 'epoch': 3} {'type': 'loss', 'content': 0.08092847466468811, 'timestamp': '2025-10-01 04:43:13.405858', 'step': 18655, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:13.460694', 'step': 18655, 'epoch': 3} {'type': 'loss', 'content': 0.10371427237987518, 'timestamp': '2025-10-01 04:43:13.466741', 'step': 18656, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:13.520391', 'step': 18656, 'epoch': 3} {'type': 'loss', 'content': 0.0781356543302536, 'timestamp': '2025-10-01 04:43:13.523783', 'step': 18657, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:13.577628', 'step': 18657, 'epoch': 3} {'type': 'loss', 'content': 0.011586256325244904, 'timestamp': '2025-10-01 04:43:13.580217', 'step': 18658, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:43:13.633709', 'step': 18658, 'epoch': 3} {'type': 'loss', 'content': 0.11136089265346527, 'timestamp': '2025-10-01 04:43:13.636033', 'step': 18659, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:13.695572', 'step': 18659, 'epoch': 3} {'type': 'loss', 'content': 0.11431284248828888, 'timestamp': '2025-10-01 04:43:13.701815', 'step': 18660, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:13.755544', 'step': 18660, 'epoch': 3} {'type': 'loss', 'content': 0.10671498626470566, 'timestamp': '2025-10-01 04:43:13.757963', 'step': 18661, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:13.813182', 'step': 18661, 'epoch': 3} {'type': 'loss', 'content': 0.05305325239896774, 'timestamp': '2025-10-01 04:43:13.815641', 'step': 18662, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:13.870619', 'step': 18662, 'epoch': 3} {'type': 'loss', 'content': 0.12670587003231049, 'timestamp': '2025-10-01 04:43:13.872885', 'step': 18663, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:13.926794', 'step': 18663, 'epoch': 3} {'type': 'loss', 'content': 0.1372823566198349, 'timestamp': '2025-10-01 04:43:13.932733', 'step': 18664, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:13.985553', 'step': 18664, 'epoch': 3} {'type': 'loss', 'content': 0.10300878435373306, 'timestamp': '2025-10-01 04:43:13.987772', 'step': 18665, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:14.054232', 'step': 18665, 'epoch': 3} {'type': 'loss', 'content': 0.0615164190530777, 'timestamp': '2025-10-01 04:43:14.056437', 'step': 18666, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:14.110283', 'step': 18666, 'epoch': 3} {'type': 'loss', 'content': 0.10559681802988052, 'timestamp': '2025-10-01 04:43:14.113364', 'step': 18667, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:14.166542', 'step': 18667, 'epoch': 3} {'type': 'loss', 'content': 0.0877801850438118, 'timestamp': '2025-10-01 04:43:14.177845', 'step': 18668, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:14.235765', 'step': 18668, 'epoch': 3} {'type': 'loss', 'content': 0.13000886142253876, 'timestamp': '2025-10-01 04:43:14.238226', 'step': 18669, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:14.293615', 'step': 18669, 'epoch': 3} {'type': 'loss', 'content': 0.12057190388441086, 'timestamp': '2025-10-01 04:43:14.296304', 'step': 18670, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:14.351116', 'step': 18670, 'epoch': 3} {'type': 'loss', 'content': 0.10746531933546066, 'timestamp': '2025-10-01 04:43:14.353501', 'step': 18671, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:14.409548', 'step': 18671, 'epoch': 3} {'type': 'loss', 'content': 0.03261042758822441, 'timestamp': '2025-10-01 04:43:14.415584', 'step': 18672, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:14.470049', 'step': 18672, 'epoch': 3} {'type': 'loss', 'content': 0.07990533858537674, 'timestamp': '2025-10-01 04:43:14.472561', 'step': 18673, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:14.528690', 'step': 18673, 'epoch': 3} {'type': 'loss', 'content': 0.07551619410514832, 'timestamp': '2025-10-01 04:43:14.531454', 'step': 18674, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:43:14.586376', 'step': 18674, 'epoch': 3} {'type': 'loss', 'content': 0.05070709437131882, 'timestamp': '2025-10-01 04:43:14.589001', 'step': 18675, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:14.642555', 'step': 18675, 'epoch': 3} {'type': 'loss', 'content': 0.07020382583141327, 'timestamp': '2025-10-01 04:43:14.649145', 'step': 18676, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:14.703190', 'step': 18676, 'epoch': 3} {'type': 'loss', 'content': 0.07451901584863663, 'timestamp': '2025-10-01 04:43:14.705589', 'step': 18677, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-01 04:43:14.761660', 'step': 18677, 'epoch': 3} {'type': 'loss', 'content': 0.14268602430820465, 'timestamp': '2025-10-01 04:43:14.764331', 'step': 18678, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:14.825016', 'step': 18678, 'epoch': 3} {'type': 'loss', 'content': 0.04828435927629471, 'timestamp': '2025-10-01 04:43:14.827284', 'step': 18679, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:14.881368', 'step': 18679, 'epoch': 3} {'type': 'loss', 'content': 0.041070159524679184, 'timestamp': '2025-10-01 04:43:14.888942', 'step': 18680, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:14.943066', 'step': 18680, 'epoch': 3} {'type': 'loss', 'content': 0.0680529847741127, 'timestamp': '2025-10-01 04:43:14.945601', 'step': 18681, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:43:15.000360', 'step': 18681, 'epoch': 3} {'type': 'loss', 'content': 0.07810737937688828, 'timestamp': '2025-10-01 04:43:15.002776', 'step': 18682, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:15.056693', 'step': 18682, 'epoch': 3} {'type': 'loss', 'content': 0.07328742742538452, 'timestamp': '2025-10-01 04:43:15.059260', 'step': 18683, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:15.126772', 'step': 18683, 'epoch': 3} {'type': 'loss', 'content': 0.0665334016084671, 'timestamp': '2025-10-01 04:43:15.133032', 'step': 18684, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:43:15.187083', 'step': 18684, 'epoch': 3} {'type': 'loss', 'content': 0.02283567190170288, 'timestamp': '2025-10-01 04:43:15.189843', 'step': 18685, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:15.243368', 'step': 18685, 'epoch': 3} {'type': 'loss', 'content': 0.04110832139849663, 'timestamp': '2025-10-01 04:43:15.246027', 'step': 18686, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:15.300727', 'step': 18686, 'epoch': 3} {'type': 'loss', 'content': 0.06754761934280396, 'timestamp': '2025-10-01 04:43:15.303290', 'step': 18687, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:15.358056', 'step': 18687, 'epoch': 3} {'type': 'loss', 'content': 0.12318877875804901, 'timestamp': '2025-10-01 04:43:15.364289', 'step': 18688, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:15.418384', 'step': 18688, 'epoch': 3} {'type': 'loss', 'content': 0.09777622669935226, 'timestamp': '2025-10-01 04:43:15.422179', 'step': 18689, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:43:15.476394', 'step': 18689, 'epoch': 3} {'type': 'loss', 'content': 0.09160690009593964, 'timestamp': '2025-10-01 04:43:15.478834', 'step': 18690, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:15.537124', 'step': 18690, 'epoch': 3} {'type': 'loss', 'content': 0.05512266606092453, 'timestamp': '2025-10-01 04:43:15.539449', 'step': 18691, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:15.600912', 'step': 18691, 'epoch': 3} {'type': 'loss', 'content': 0.11116532236337662, 'timestamp': '2025-10-01 04:43:15.607923', 'step': 18692, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:15.661656', 'step': 18692, 'epoch': 3} {'type': 'loss', 'content': 0.05822862312197685, 'timestamp': '2025-10-01 04:43:15.663639', 'step': 18693, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:15.720678', 'step': 18693, 'epoch': 3} {'type': 'loss', 'content': 0.13791398704051971, 'timestamp': '2025-10-01 04:43:15.723226', 'step': 18694, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:15.786357', 'step': 18694, 'epoch': 3} {'type': 'loss', 'content': 0.025315463542938232, 'timestamp': '2025-10-01 04:43:15.789039', 'step': 18695, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:15.843164', 'step': 18695, 'epoch': 3} {'type': 'loss', 'content': 0.023898284882307053, 'timestamp': '2025-10-01 04:43:15.848925', 'step': 18696, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:15.902058', 'step': 18696, 'epoch': 3} {'type': 'loss', 'content': 0.11414837092161179, 'timestamp': '2025-10-01 04:43:15.904113', 'step': 18697, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:15.957195', 'step': 18697, 'epoch': 3} {'type': 'loss', 'content': 0.09143505990505219, 'timestamp': '2025-10-01 04:43:15.959391', 'step': 18698, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:16.012570', 'step': 18698, 'epoch': 3} {'type': 'loss', 'content': 0.0624205656349659, 'timestamp': '2025-10-01 04:43:16.014730', 'step': 18699, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:16.067363', 'step': 18699, 'epoch': 3} {'type': 'loss', 'content': 0.09998511523008347, 'timestamp': '2025-10-01 04:43:16.073119', 'step': 18700, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:16.126580', 'step': 18700, 'epoch': 3} {'type': 'loss', 'content': 0.08224053680896759, 'timestamp': '2025-10-01 04:43:16.128801', 'step': 18701, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:16.188304', 'step': 18701, 'epoch': 3} {'type': 'loss', 'content': 0.04071187227964401, 'timestamp': '2025-10-01 04:43:16.190551', 'step': 18702, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:16.244087', 'step': 18702, 'epoch': 3} {'type': 'loss', 'content': 0.07479298859834671, 'timestamp': '2025-10-01 04:43:16.246500', 'step': 18703, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:16.300013', 'step': 18703, 'epoch': 3} {'type': 'loss', 'content': 0.06970805674791336, 'timestamp': '2025-10-01 04:43:16.305742', 'step': 18704, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:16.360469', 'step': 18704, 'epoch': 3} {'type': 'loss', 'content': 0.1540168821811676, 'timestamp': '2025-10-01 04:43:16.362613', 'step': 18705, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:16.416808', 'step': 18705, 'epoch': 3} {'type': 'loss', 'content': 0.02422310970723629, 'timestamp': '2025-10-01 04:43:16.427645', 'step': 18706, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:16.481306', 'step': 18706, 'epoch': 3} {'type': 'loss', 'content': 0.09590919315814972, 'timestamp': '2025-10-01 04:43:16.483701', 'step': 18707, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:43:16.536852', 'step': 18707, 'epoch': 3} {'type': 'loss', 'content': 0.13780470192432404, 'timestamp': '2025-10-01 04:43:16.542656', 'step': 18708, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:43:16.595055', 'step': 18708, 'epoch': 3} {'type': 'loss', 'content': 0.09388650953769684, 'timestamp': '2025-10-01 04:43:16.597094', 'step': 18709, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:43:16.660506', 'step': 18709, 'epoch': 3} {'type': 'loss', 'content': 0.07422693818807602, 'timestamp': '2025-10-01 04:43:16.663162', 'step': 18710, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:43:16.717278', 'step': 18710, 'epoch': 3} {'type': 'loss', 'content': 0.12157005071640015, 'timestamp': '2025-10-01 04:43:16.719463', 'step': 18711, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:16.772798', 'step': 18711, 'epoch': 3} {'type': 'loss', 'content': 0.06348525732755661, 'timestamp': '2025-10-01 04:43:16.778744', 'step': 18712, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:16.839539', 'step': 18712, 'epoch': 3} {'type': 'loss', 'content': 0.11850367486476898, 'timestamp': '2025-10-01 04:43:16.841773', 'step': 18713, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:16.896847', 'step': 18713, 'epoch': 3} {'type': 'loss', 'content': 0.09438366442918777, 'timestamp': '2025-10-01 04:43:16.898933', 'step': 18714, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:16.952551', 'step': 18714, 'epoch': 3} {'type': 'loss', 'content': 0.11307287216186523, 'timestamp': '2025-10-01 04:43:16.955021', 'step': 18715, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:17.008213', 'step': 18715, 'epoch': 3} {'type': 'loss', 'content': 0.11379577219486237, 'timestamp': '2025-10-01 04:43:17.015204', 'step': 18716, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:17.071103', 'step': 18716, 'epoch': 3} {'type': 'loss', 'content': 0.01896652765572071, 'timestamp': '2025-10-01 04:43:17.073286', 'step': 18717, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:17.135368', 'step': 18717, 'epoch': 3} {'type': 'loss', 'content': 0.03827555850148201, 'timestamp': '2025-10-01 04:43:17.137586', 'step': 18718, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:17.191188', 'step': 18718, 'epoch': 3} {'type': 'loss', 'content': 0.14051750302314758, 'timestamp': '2025-10-01 04:43:17.193414', 'step': 18719, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:17.247451', 'step': 18719, 'epoch': 3} {'type': 'loss', 'content': 0.03785375505685806, 'timestamp': '2025-10-01 04:43:17.253077', 'step': 18720, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:17.305831', 'step': 18720, 'epoch': 3} {'type': 'loss', 'content': 0.038934387266635895, 'timestamp': '2025-10-01 04:43:17.308048', 'step': 18721, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:17.360921', 'step': 18721, 'epoch': 3} {'type': 'loss', 'content': 0.06099509075284004, 'timestamp': '2025-10-01 04:43:17.363309', 'step': 18722, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:17.417706', 'step': 18722, 'epoch': 3} {'type': 'loss', 'content': 0.07204284518957138, 'timestamp': '2025-10-01 04:43:17.419889', 'step': 18723, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:17.473582', 'step': 18723, 'epoch': 3} {'type': 'loss', 'content': 0.13212978839874268, 'timestamp': '2025-10-01 04:43:17.479410', 'step': 18724, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:17.532896', 'step': 18724, 'epoch': 3} {'type': 'loss', 'content': 0.06662878394126892, 'timestamp': '2025-10-01 04:43:17.535047', 'step': 18725, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:17.588585', 'step': 18725, 'epoch': 3} {'type': 'loss', 'content': 0.038428258150815964, 'timestamp': '2025-10-01 04:43:17.590782', 'step': 18726, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:17.644292', 'step': 18726, 'epoch': 3} {'type': 'loss', 'content': 0.10250412672758102, 'timestamp': '2025-10-01 04:43:17.646488', 'step': 18727, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:17.699439', 'step': 18727, 'epoch': 3} {'type': 'loss', 'content': 0.01852898858487606, 'timestamp': '2025-10-01 04:43:17.705243', 'step': 18728, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:17.758097', 'step': 18728, 'epoch': 3} {'type': 'loss', 'content': 0.1539091020822525, 'timestamp': '2025-10-01 04:43:17.760175', 'step': 18729, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:17.813698', 'step': 18729, 'epoch': 3} {'type': 'loss', 'content': 0.11452624946832657, 'timestamp': '2025-10-01 04:43:17.816186', 'step': 18730, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:17.870149', 'step': 18730, 'epoch': 3} {'type': 'loss', 'content': 0.07099941372871399, 'timestamp': '2025-10-01 04:43:17.873329', 'step': 18731, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:17.927204', 'step': 18731, 'epoch': 3} {'type': 'loss', 'content': 0.15247343480587006, 'timestamp': '2025-10-01 04:43:17.933370', 'step': 18732, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:17.986218', 'step': 18732, 'epoch': 3} {'type': 'loss', 'content': 0.10058089345693588, 'timestamp': '2025-10-01 04:43:17.988393', 'step': 18733, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:18.050491', 'step': 18733, 'epoch': 3} {'type': 'loss', 'content': 0.030298540368676186, 'timestamp': '2025-10-01 04:43:18.052609', 'step': 18734, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:18.105655', 'step': 18734, 'epoch': 3} {'type': 'loss', 'content': 0.06987728923559189, 'timestamp': '2025-10-01 04:43:18.107665', 'step': 18735, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:43:18.160565', 'step': 18735, 'epoch': 3} {'type': 'loss', 'content': 0.0866175964474678, 'timestamp': '2025-10-01 04:43:18.166334', 'step': 18736, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:18.218734', 'step': 18736, 'epoch': 3} {'type': 'loss', 'content': 0.07971520721912384, 'timestamp': '2025-10-01 04:43:18.220804', 'step': 18737, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:43:18.273818', 'step': 18737, 'epoch': 3} {'type': 'loss', 'content': 0.019734421744942665, 'timestamp': '2025-10-01 04:43:18.275930', 'step': 18738, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:18.328691', 'step': 18738, 'epoch': 3} {'type': 'loss', 'content': 0.13335120677947998, 'timestamp': '2025-10-01 04:43:18.330924', 'step': 18739, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:18.383667', 'step': 18739, 'epoch': 3} {'type': 'loss', 'content': 0.07535135746002197, 'timestamp': '2025-10-01 04:43:18.389265', 'step': 18740, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:18.441922', 'step': 18740, 'epoch': 3} {'type': 'loss', 'content': 0.04656917229294777, 'timestamp': '2025-10-01 04:43:18.444017', 'step': 18741, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:18.497332', 'step': 18741, 'epoch': 3} {'type': 'loss', 'content': 0.12191883474588394, 'timestamp': '2025-10-01 04:43:18.499295', 'step': 18742, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:18.552357', 'step': 18742, 'epoch': 3} {'type': 'loss', 'content': 0.03954662010073662, 'timestamp': '2025-10-01 04:43:18.555655', 'step': 18743, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:18.610398', 'step': 18743, 'epoch': 3} {'type': 'loss', 'content': 0.14125356078147888, 'timestamp': '2025-10-01 04:43:18.616133', 'step': 18744, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:18.668837', 'step': 18744, 'epoch': 3} {'type': 'loss', 'content': 0.06400970369577408, 'timestamp': '2025-10-01 04:43:18.671142', 'step': 18745, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:43:18.724777', 'step': 18745, 'epoch': 3} {'type': 'loss', 'content': 0.12989261746406555, 'timestamp': '2025-10-01 04:43:18.726987', 'step': 18746, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:18.781538', 'step': 18746, 'epoch': 3} {'type': 'loss', 'content': 0.04469640925526619, 'timestamp': '2025-10-01 04:43:18.791289', 'step': 18747, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:18.848780', 'step': 18747, 'epoch': 3} {'type': 'loss', 'content': 0.13419857621192932, 'timestamp': '2025-10-01 04:43:18.854506', 'step': 18748, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:18.908733', 'step': 18748, 'epoch': 3} {'type': 'loss', 'content': 0.07483114302158356, 'timestamp': '2025-10-01 04:43:18.910890', 'step': 18749, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:18.963553', 'step': 18749, 'epoch': 3} {'type': 'loss', 'content': 0.051820483058691025, 'timestamp': '2025-10-01 04:43:18.970497', 'step': 18750, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:19.023471', 'step': 18750, 'epoch': 3} {'type': 'loss', 'content': 0.04881777614355087, 'timestamp': '2025-10-01 04:43:19.025522', 'step': 18751, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:19.079166', 'step': 18751, 'epoch': 3} {'type': 'loss', 'content': 0.12105491012334824, 'timestamp': '2025-10-01 04:43:19.088556', 'step': 18752, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:19.140830', 'step': 18752, 'epoch': 3} {'type': 'loss', 'content': 0.1378522515296936, 'timestamp': '2025-10-01 04:43:19.142979', 'step': 18753, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:19.195840', 'step': 18753, 'epoch': 3} {'type': 'loss', 'content': 0.10534989088773727, 'timestamp': '2025-10-01 04:43:19.197912', 'step': 18754, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:19.251545', 'step': 18754, 'epoch': 3} {'type': 'loss', 'content': 0.11524450778961182, 'timestamp': '2025-10-01 04:43:19.253814', 'step': 18755, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:19.306967', 'step': 18755, 'epoch': 3} {'type': 'loss', 'content': 0.11698468029499054, 'timestamp': '2025-10-01 04:43:19.313981', 'step': 18756, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:19.366877', 'step': 18756, 'epoch': 3} {'type': 'loss', 'content': 0.09035272151231766, 'timestamp': '2025-10-01 04:43:19.369009', 'step': 18757, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:19.422248', 'step': 18757, 'epoch': 3} {'type': 'loss', 'content': 0.05778946354985237, 'timestamp': '2025-10-01 04:43:19.425343', 'step': 18758, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:19.479367', 'step': 18758, 'epoch': 3} {'type': 'loss', 'content': 0.05784405767917633, 'timestamp': '2025-10-01 04:43:19.481725', 'step': 18759, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:19.535034', 'step': 18759, 'epoch': 3} {'type': 'loss', 'content': 0.10817505419254303, 'timestamp': '2025-10-01 04:43:19.541164', 'step': 18760, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:43:19.608286', 'step': 18760, 'epoch': 3} {'type': 'loss', 'content': 0.14875563979148865, 'timestamp': '2025-10-01 04:43:19.611606', 'step': 18761, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:19.666371', 'step': 18761, 'epoch': 3} {'type': 'loss', 'content': 0.058854784816503525, 'timestamp': '2025-10-01 04:43:19.668563', 'step': 18762, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:19.721564', 'step': 18762, 'epoch': 3} {'type': 'loss', 'content': 0.0757700502872467, 'timestamp': '2025-10-01 04:43:19.723797', 'step': 18763, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:19.776535', 'step': 18763, 'epoch': 3} {'type': 'loss', 'content': 0.11256284266710281, 'timestamp': '2025-10-01 04:43:19.782251', 'step': 18764, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:19.834962', 'step': 18764, 'epoch': 3} {'type': 'loss', 'content': 0.16756238043308258, 'timestamp': '2025-10-01 04:43:19.837152', 'step': 18765, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:19.889693', 'step': 18765, 'epoch': 3} {'type': 'loss', 'content': 0.07597270607948303, 'timestamp': '2025-10-01 04:43:19.891768', 'step': 18766, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:19.944548', 'step': 18766, 'epoch': 3} {'type': 'loss', 'content': 0.1685861051082611, 'timestamp': '2025-10-01 04:43:19.947890', 'step': 18767, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:20.001233', 'step': 18767, 'epoch': 3} {'type': 'loss', 'content': 0.12099138647317886, 'timestamp': '2025-10-01 04:43:20.007122', 'step': 18768, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:20.065715', 'step': 18768, 'epoch': 3} {'type': 'loss', 'content': 0.11579178273677826, 'timestamp': '2025-10-01 04:43:20.075502', 'step': 18769, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:20.128901', 'step': 18769, 'epoch': 3} {'type': 'loss', 'content': 0.07058035582304001, 'timestamp': '2025-10-01 04:43:20.131023', 'step': 18770, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:20.185218', 'step': 18770, 'epoch': 3} {'type': 'loss', 'content': 0.07801296561956406, 'timestamp': '2025-10-01 04:43:20.187912', 'step': 18771, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:20.248827', 'step': 18771, 'epoch': 3} {'type': 'loss', 'content': 0.06071316823363304, 'timestamp': '2025-10-01 04:43:20.255114', 'step': 18772, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:20.307733', 'step': 18772, 'epoch': 3} {'type': 'loss', 'content': 0.12474726140499115, 'timestamp': '2025-10-01 04:43:20.311782', 'step': 18773, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:43:20.365200', 'step': 18773, 'epoch': 3} {'type': 'loss', 'content': 0.09837987273931503, 'timestamp': '2025-10-01 04:43:20.367479', 'step': 18774, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:20.421215', 'step': 18774, 'epoch': 3} {'type': 'loss', 'content': 0.10175762325525284, 'timestamp': '2025-10-01 04:43:20.423717', 'step': 18775, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:20.478932', 'step': 18775, 'epoch': 3} {'type': 'loss', 'content': 0.09747879952192307, 'timestamp': '2025-10-01 04:43:20.485034', 'step': 18776, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:20.537988', 'step': 18776, 'epoch': 3} {'type': 'loss', 'content': 0.11879389733076096, 'timestamp': '2025-10-01 04:43:20.551976', 'step': 18777, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:20.606009', 'step': 18777, 'epoch': 3} {'type': 'loss', 'content': 0.08947695046663284, 'timestamp': '2025-10-01 04:43:20.608168', 'step': 18778, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:20.665183', 'step': 18778, 'epoch': 3} {'type': 'loss', 'content': 0.053813375532627106, 'timestamp': '2025-10-01 04:43:20.667174', 'step': 18779, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:43:20.720481', 'step': 18779, 'epoch': 3} {'type': 'loss', 'content': 0.06162439286708832, 'timestamp': '2025-10-01 04:43:20.726540', 'step': 18780, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:20.779262', 'step': 18780, 'epoch': 3} {'type': 'loss', 'content': 0.1760786473751068, 'timestamp': '2025-10-01 04:43:20.781502', 'step': 18781, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:20.834519', 'step': 18781, 'epoch': 3} {'type': 'loss', 'content': 0.08667750656604767, 'timestamp': '2025-10-01 04:43:20.836780', 'step': 18782, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:43:20.890256', 'step': 18782, 'epoch': 3} {'type': 'loss', 'content': 0.12136607617139816, 'timestamp': '2025-10-01 04:43:20.892540', 'step': 18783, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:43:20.945294', 'step': 18783, 'epoch': 3} {'type': 'loss', 'content': 0.02862873487174511, 'timestamp': '2025-10-01 04:43:20.951342', 'step': 18784, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:43:21.004245', 'step': 18784, 'epoch': 3} {'type': 'loss', 'content': 0.16784173250198364, 'timestamp': '2025-10-01 04:43:21.006412', 'step': 18785, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:21.059041', 'step': 18785, 'epoch': 3} {'type': 'loss', 'content': 0.16633526980876923, 'timestamp': '2025-10-01 04:43:21.061145', 'step': 18786, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:21.115849', 'step': 18786, 'epoch': 3} {'type': 'loss', 'content': 0.05371197313070297, 'timestamp': '2025-10-01 04:43:21.117992', 'step': 18787, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:43:21.173772', 'step': 18787, 'epoch': 3} {'type': 'loss', 'content': 0.04389116168022156, 'timestamp': '2025-10-01 04:43:21.180162', 'step': 18788, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:21.233170', 'step': 18788, 'epoch': 3} {'type': 'loss', 'content': 0.04837658256292343, 'timestamp': '2025-10-01 04:43:21.235384', 'step': 18789, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:21.288554', 'step': 18789, 'epoch': 3} {'type': 'loss', 'content': 0.09867127239704132, 'timestamp': '2025-10-01 04:43:21.290781', 'step': 18790, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:21.344195', 'step': 18790, 'epoch': 3} {'type': 'loss', 'content': 0.01788683421909809, 'timestamp': '2025-10-01 04:43:21.348882', 'step': 18791, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:43:21.404473', 'step': 18791, 'epoch': 3} {'type': 'loss', 'content': 0.21976134181022644, 'timestamp': '2025-10-01 04:43:21.410766', 'step': 18792, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:21.463746', 'step': 18792, 'epoch': 3} {'type': 'loss', 'content': 0.11273731291294098, 'timestamp': '2025-10-01 04:43:21.465900', 'step': 18793, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:21.518996', 'step': 18793, 'epoch': 3} {'type': 'loss', 'content': 0.10134705156087875, 'timestamp': '2025-10-01 04:43:21.522093', 'step': 18794, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:21.575292', 'step': 18794, 'epoch': 3} {'type': 'loss', 'content': 0.028872575610876083, 'timestamp': '2025-10-01 04:43:21.577437', 'step': 18795, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:21.634562', 'step': 18795, 'epoch': 3} {'type': 'loss', 'content': 0.030736582353711128, 'timestamp': '2025-10-01 04:43:21.640804', 'step': 18796, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:21.694536', 'step': 18796, 'epoch': 3} {'type': 'loss', 'content': 0.03296223282814026, 'timestamp': '2025-10-01 04:43:21.696682', 'step': 18797, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:21.749897', 'step': 18797, 'epoch': 3} {'type': 'loss', 'content': 0.04799067601561546, 'timestamp': '2025-10-01 04:43:21.753017', 'step': 18798, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:21.807238', 'step': 18798, 'epoch': 3} {'type': 'loss', 'content': 0.13716904819011688, 'timestamp': '2025-10-01 04:43:21.810163', 'step': 18799, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:21.863906', 'step': 18799, 'epoch': 3} {'type': 'loss', 'content': 0.0718890056014061, 'timestamp': '2025-10-01 04:43:21.879966', 'step': 18800, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:21.938148', 'step': 18800, 'epoch': 3} {'type': 'loss', 'content': 0.12157499045133591, 'timestamp': '2025-10-01 04:43:21.940455', 'step': 18801, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:21.995101', 'step': 18801, 'epoch': 3} {'type': 'loss', 'content': 0.09680197387933731, 'timestamp': '2025-10-01 04:43:21.997580', 'step': 18802, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:22.053016', 'step': 18802, 'epoch': 3} {'type': 'loss', 'content': 0.04783894866704941, 'timestamp': '2025-10-01 04:43:22.061425', 'step': 18803, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:22.120339', 'step': 18803, 'epoch': 3} {'type': 'loss', 'content': 0.12895271182060242, 'timestamp': '2025-10-01 04:43:22.126146', 'step': 18804, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:22.182285', 'step': 18804, 'epoch': 3} {'type': 'loss', 'content': 0.12977030873298645, 'timestamp': '2025-10-01 04:43:22.184792', 'step': 18805, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:22.238371', 'step': 18805, 'epoch': 3} {'type': 'loss', 'content': 0.07491621375083923, 'timestamp': '2025-10-01 04:43:22.240616', 'step': 18806, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:22.294018', 'step': 18806, 'epoch': 3} {'type': 'loss', 'content': 0.06002970412373543, 'timestamp': '2025-10-01 04:43:22.296285', 'step': 18807, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:22.349450', 'step': 18807, 'epoch': 3} {'type': 'loss', 'content': 0.1075977087020874, 'timestamp': '2025-10-01 04:43:22.355623', 'step': 18808, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:22.408461', 'step': 18808, 'epoch': 3} {'type': 'loss', 'content': 0.10736478865146637, 'timestamp': '2025-10-01 04:43:22.418172', 'step': 18809, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:22.471203', 'step': 18809, 'epoch': 3} {'type': 'loss', 'content': 0.1062108650803566, 'timestamp': '2025-10-01 04:43:22.473328', 'step': 18810, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:22.526409', 'step': 18810, 'epoch': 3} {'type': 'loss', 'content': 0.09582894295454025, 'timestamp': '2025-10-01 04:43:22.528487', 'step': 18811, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:43:22.582762', 'step': 18811, 'epoch': 3} {'type': 'loss', 'content': 0.14621475338935852, 'timestamp': '2025-10-01 04:43:22.589052', 'step': 18812, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:22.641712', 'step': 18812, 'epoch': 3} {'type': 'loss', 'content': 0.04785944148898125, 'timestamp': '2025-10-01 04:43:22.643942', 'step': 18813, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:43:22.697133', 'step': 18813, 'epoch': 3} {'type': 'loss', 'content': 0.08939457684755325, 'timestamp': '2025-10-01 04:43:22.699274', 'step': 18814, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:43:22.763492', 'step': 18814, 'epoch': 3} {'type': 'loss', 'content': 0.13154083490371704, 'timestamp': '2025-10-01 04:43:22.765779', 'step': 18815, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:22.819477', 'step': 18815, 'epoch': 3} {'type': 'loss', 'content': 0.05667981877923012, 'timestamp': '2025-10-01 04:43:22.826438', 'step': 18816, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:22.889735', 'step': 18816, 'epoch': 3} {'type': 'loss', 'content': 0.12008018791675568, 'timestamp': '2025-10-01 04:43:22.893534', 'step': 18817, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:22.947381', 'step': 18817, 'epoch': 3} {'type': 'loss', 'content': 0.07808972895145416, 'timestamp': '2025-10-01 04:43:22.949663', 'step': 18818, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:23.002903', 'step': 18818, 'epoch': 3} {'type': 'loss', 'content': 0.08097478747367859, 'timestamp': '2025-10-01 04:43:23.005024', 'step': 18819, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:43:23.058018', 'step': 18819, 'epoch': 3} {'type': 'loss', 'content': 0.13350917398929596, 'timestamp': '2025-10-01 04:43:23.063952', 'step': 18820, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:23.118213', 'step': 18820, 'epoch': 3} {'type': 'loss', 'content': 0.1462550163269043, 'timestamp': '2025-10-01 04:43:23.120376', 'step': 18821, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:23.173230', 'step': 18821, 'epoch': 3} {'type': 'loss', 'content': 0.03434479981660843, 'timestamp': '2025-10-01 04:43:23.175343', 'step': 18822, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:23.231591', 'step': 18822, 'epoch': 3} {'type': 'loss', 'content': 0.08033541589975357, 'timestamp': '2025-10-01 04:43:23.238515', 'step': 18823, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:23.291846', 'step': 18823, 'epoch': 3} {'type': 'loss', 'content': 0.0554000660777092, 'timestamp': '2025-10-01 04:43:23.297901', 'step': 18824, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:23.351676', 'step': 18824, 'epoch': 3} {'type': 'loss', 'content': 0.06604330986738205, 'timestamp': '2025-10-01 04:43:23.354280', 'step': 18825, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:23.408538', 'step': 18825, 'epoch': 3} {'type': 'loss', 'content': 0.0910535529255867, 'timestamp': '2025-10-01 04:43:23.411436', 'step': 18826, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:43:23.476813', 'step': 18826, 'epoch': 3} {'type': 'loss', 'content': 0.0866202637553215, 'timestamp': '2025-10-01 04:43:23.479278', 'step': 18827, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:43:23.533736', 'step': 18827, 'epoch': 3} {'type': 'loss', 'content': 0.024842100217938423, 'timestamp': '2025-10-01 04:43:23.539906', 'step': 18828, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:23.593875', 'step': 18828, 'epoch': 3} {'type': 'loss', 'content': 0.13091987371444702, 'timestamp': '2025-10-01 04:43:23.596572', 'step': 18829, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:23.651022', 'step': 18829, 'epoch': 3} {'type': 'loss', 'content': 0.09028997272253036, 'timestamp': '2025-10-01 04:43:23.653422', 'step': 18830, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:23.707718', 'step': 18830, 'epoch': 3} {'type': 'loss', 'content': 0.09032818675041199, 'timestamp': '2025-10-01 04:43:23.710180', 'step': 18831, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:43:23.764974', 'step': 18831, 'epoch': 3} {'type': 'loss', 'content': 0.08620540052652359, 'timestamp': '2025-10-01 04:43:23.771310', 'step': 18832, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:23.824960', 'step': 18832, 'epoch': 3} {'type': 'loss', 'content': 0.0879589095711708, 'timestamp': '2025-10-01 04:43:23.828342', 'step': 18833, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:23.882519', 'step': 18833, 'epoch': 3} {'type': 'loss', 'content': 0.06885234266519547, 'timestamp': '2025-10-01 04:43:23.885180', 'step': 18834, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:23.940635', 'step': 18834, 'epoch': 3} {'type': 'loss', 'content': 0.10834997892379761, 'timestamp': '2025-10-01 04:43:23.944105', 'step': 18835, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:23.999337', 'step': 18835, 'epoch': 3} {'type': 'loss', 'content': 0.03756572678685188, 'timestamp': '2025-10-01 04:43:24.005552', 'step': 18836, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:43:24.061895', 'step': 18836, 'epoch': 3} {'type': 'loss', 'content': 0.07073811441659927, 'timestamp': '2025-10-01 04:43:24.064878', 'step': 18837, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:43:24.119386', 'step': 18837, 'epoch': 3} {'type': 'loss', 'content': 0.1722182035446167, 'timestamp': '2025-10-01 04:43:24.122001', 'step': 18838, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:24.176115', 'step': 18838, 'epoch': 3} {'type': 'loss', 'content': 0.15895843505859375, 'timestamp': '2025-10-01 04:43:24.178702', 'step': 18839, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:24.233665', 'step': 18839, 'epoch': 3} {'type': 'loss', 'content': 0.09663966298103333, 'timestamp': '2025-10-01 04:43:24.248803', 'step': 18840, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:43:24.302316', 'step': 18840, 'epoch': 3} {'type': 'loss', 'content': 0.07246804982423782, 'timestamp': '2025-10-01 04:43:24.305402', 'step': 18841, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:24.359546', 'step': 18841, 'epoch': 3} {'type': 'loss', 'content': 0.14483529329299927, 'timestamp': '2025-10-01 04:43:24.362101', 'step': 18842, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:43:24.416937', 'step': 18842, 'epoch': 3} {'type': 'loss', 'content': 0.09924650937318802, 'timestamp': '2025-10-01 04:43:24.419404', 'step': 18843, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:24.479564', 'step': 18843, 'epoch': 3} {'type': 'loss', 'content': 0.12119074910879135, 'timestamp': '2025-10-01 04:43:24.485840', 'step': 18844, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:24.539991', 'step': 18844, 'epoch': 3} {'type': 'loss', 'content': 0.04807209596037865, 'timestamp': '2025-10-01 04:43:24.542778', 'step': 18845, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:24.602810', 'step': 18845, 'epoch': 3} {'type': 'loss', 'content': 0.13379260897636414, 'timestamp': '2025-10-01 04:43:24.605766', 'step': 18846, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:24.660140', 'step': 18846, 'epoch': 3} {'type': 'loss', 'content': 0.047970253974199295, 'timestamp': '2025-10-01 04:43:24.662382', 'step': 18847, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:24.716860', 'step': 18847, 'epoch': 3} {'type': 'loss', 'content': 0.07514271885156631, 'timestamp': '2025-10-01 04:43:24.722735', 'step': 18848, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:24.776651', 'step': 18848, 'epoch': 3} {'type': 'loss', 'content': 0.07111012190580368, 'timestamp': '2025-10-01 04:43:24.778869', 'step': 18849, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:24.833317', 'step': 18849, 'epoch': 3} {'type': 'loss', 'content': 0.0498635433614254, 'timestamp': '2025-10-01 04:43:24.835625', 'step': 18850, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:24.890246', 'step': 18850, 'epoch': 3} {'type': 'loss', 'content': 0.14488676190376282, 'timestamp': '2025-10-01 04:43:24.892661', 'step': 18851, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:24.949646', 'step': 18851, 'epoch': 3} {'type': 'loss', 'content': 0.07845856994390488, 'timestamp': '2025-10-01 04:43:24.955978', 'step': 18852, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:25.010744', 'step': 18852, 'epoch': 3} {'type': 'loss', 'content': 0.09135162085294724, 'timestamp': '2025-10-01 04:43:25.012864', 'step': 18853, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:25.066935', 'step': 18853, 'epoch': 3} {'type': 'loss', 'content': 0.0840856060385704, 'timestamp': '2025-10-01 04:43:25.069316', 'step': 18854, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:25.124562', 'step': 18854, 'epoch': 3} {'type': 'loss', 'content': 0.09489833563566208, 'timestamp': '2025-10-01 04:43:25.127046', 'step': 18855, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:25.183870', 'step': 18855, 'epoch': 3} {'type': 'loss', 'content': 0.03799626976251602, 'timestamp': '2025-10-01 04:43:25.190162', 'step': 18856, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:25.246116', 'step': 18856, 'epoch': 3} {'type': 'loss', 'content': 0.11422689259052277, 'timestamp': '2025-10-01 04:43:25.248417', 'step': 18857, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:25.303402', 'step': 18857, 'epoch': 3} {'type': 'loss', 'content': 0.09432724863290787, 'timestamp': '2025-10-01 04:43:25.305453', 'step': 18858, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:25.359903', 'step': 18858, 'epoch': 3} {'type': 'loss', 'content': 0.08101167529821396, 'timestamp': '2025-10-01 04:43:25.362134', 'step': 18859, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:43:25.415987', 'step': 18859, 'epoch': 3} {'type': 'loss', 'content': 0.07733699679374695, 'timestamp': '2025-10-01 04:43:25.421993', 'step': 18860, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:25.476383', 'step': 18860, 'epoch': 3} {'type': 'loss', 'content': 0.14895880222320557, 'timestamp': '2025-10-01 04:43:25.478625', 'step': 18861, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:25.535992', 'step': 18861, 'epoch': 3} {'type': 'loss', 'content': 0.1397109478712082, 'timestamp': '2025-10-01 04:43:25.538058', 'step': 18862, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:25.599020', 'step': 18862, 'epoch': 3} {'type': 'loss', 'content': 0.10066808015108109, 'timestamp': '2025-10-01 04:43:25.601195', 'step': 18863, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:25.654729', 'step': 18863, 'epoch': 3} {'type': 'loss', 'content': 0.07451093196868896, 'timestamp': '2025-10-01 04:43:25.660868', 'step': 18864, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:25.714197', 'step': 18864, 'epoch': 3} {'type': 'loss', 'content': 0.1204383596777916, 'timestamp': '2025-10-01 04:43:25.716264', 'step': 18865, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:25.769290', 'step': 18865, 'epoch': 3} {'type': 'loss', 'content': 0.16050995886325836, 'timestamp': '2025-10-01 04:43:25.771484', 'step': 18866, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:43:25.825634', 'step': 18866, 'epoch': 3} {'type': 'loss', 'content': 0.12220723181962967, 'timestamp': '2025-10-01 04:43:25.828013', 'step': 18867, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:25.881048', 'step': 18867, 'epoch': 3} {'type': 'loss', 'content': 0.129397451877594, 'timestamp': '2025-10-01 04:43:25.887033', 'step': 18868, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:43:25.940332', 'step': 18868, 'epoch': 3} {'type': 'loss', 'content': 0.07716894149780273, 'timestamp': '2025-10-01 04:43:25.942500', 'step': 18869, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:25.997733', 'step': 18869, 'epoch': 3} {'type': 'loss', 'content': 0.08476296812295914, 'timestamp': '2025-10-01 04:43:25.999956', 'step': 18870, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:26.055094', 'step': 18870, 'epoch': 3} {'type': 'loss', 'content': 0.17327472567558289, 'timestamp': '2025-10-01 04:43:26.057402', 'step': 18871, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:26.111103', 'step': 18871, 'epoch': 3} {'type': 'loss', 'content': 0.058070264756679535, 'timestamp': '2025-10-01 04:43:26.123027', 'step': 18872, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:26.176666', 'step': 18872, 'epoch': 3} {'type': 'loss', 'content': 0.11796680092811584, 'timestamp': '2025-10-01 04:43:26.179030', 'step': 18873, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:43:26.233123', 'step': 18873, 'epoch': 3} {'type': 'loss', 'content': 0.04154079034924507, 'timestamp': '2025-10-01 04:43:26.235423', 'step': 18874, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:26.288938', 'step': 18874, 'epoch': 3} {'type': 'loss', 'content': 0.05155983194708824, 'timestamp': '2025-10-01 04:43:26.291248', 'step': 18875, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:26.344297', 'step': 18875, 'epoch': 3} {'type': 'loss', 'content': 0.041608501225709915, 'timestamp': '2025-10-01 04:43:26.350381', 'step': 18876, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:26.403613', 'step': 18876, 'epoch': 3} {'type': 'loss', 'content': 0.071692556142807, 'timestamp': '2025-10-01 04:43:26.405778', 'step': 18877, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:26.461156', 'step': 18877, 'epoch': 3} {'type': 'loss', 'content': 0.10988341271877289, 'timestamp': '2025-10-01 04:43:26.467691', 'step': 18878, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:26.524200', 'step': 18878, 'epoch': 3} {'type': 'loss', 'content': 0.0792904645204544, 'timestamp': '2025-10-01 04:43:26.527317', 'step': 18879, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:26.584428', 'step': 18879, 'epoch': 3} {'type': 'loss', 'content': 0.04446171596646309, 'timestamp': '2025-10-01 04:43:26.590698', 'step': 18880, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:26.644072', 'step': 18880, 'epoch': 3} {'type': 'loss', 'content': 0.13254314661026, 'timestamp': '2025-10-01 04:43:26.646417', 'step': 18881, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:26.699800', 'step': 18881, 'epoch': 3} {'type': 'loss', 'content': 0.0863618329167366, 'timestamp': '2025-10-01 04:43:26.701809', 'step': 18882, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:43:26.755648', 'step': 18882, 'epoch': 3} {'type': 'loss', 'content': 0.11311694234609604, 'timestamp': '2025-10-01 04:43:26.757866', 'step': 18883, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:26.811561', 'step': 18883, 'epoch': 3} {'type': 'loss', 'content': 0.07414181530475616, 'timestamp': '2025-10-01 04:43:26.817769', 'step': 18884, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:26.871617', 'step': 18884, 'epoch': 3} {'type': 'loss', 'content': 0.0688081905245781, 'timestamp': '2025-10-01 04:43:26.873943', 'step': 18885, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:26.929631', 'step': 18885, 'epoch': 3} {'type': 'loss', 'content': 0.173720121383667, 'timestamp': '2025-10-01 04:43:26.931925', 'step': 18886, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:26.996050', 'step': 18886, 'epoch': 3} {'type': 'loss', 'content': 0.03702202066779137, 'timestamp': '2025-10-01 04:43:27.005941', 'step': 18887, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:27.061347', 'step': 18887, 'epoch': 3} {'type': 'loss', 'content': 0.05865757167339325, 'timestamp': '2025-10-01 04:43:27.068017', 'step': 18888, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:27.121597', 'step': 18888, 'epoch': 3} {'type': 'loss', 'content': 0.16626477241516113, 'timestamp': '2025-10-01 04:43:27.123837', 'step': 18889, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:27.177716', 'step': 18889, 'epoch': 3} {'type': 'loss', 'content': 0.1819661408662796, 'timestamp': '2025-10-01 04:43:27.179779', 'step': 18890, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:43:27.233144', 'step': 18890, 'epoch': 3} {'type': 'loss', 'content': 0.08048734813928604, 'timestamp': '2025-10-01 04:43:27.235934', 'step': 18891, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:43:27.288970', 'step': 18891, 'epoch': 3} {'type': 'loss', 'content': 0.08543629944324493, 'timestamp': '2025-10-01 04:43:27.295017', 'step': 18892, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:43:27.347196', 'step': 18892, 'epoch': 3} {'type': 'loss', 'content': 0.10058954358100891, 'timestamp': '2025-10-01 04:43:27.349364', 'step': 18893, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:27.402218', 'step': 18893, 'epoch': 3} {'type': 'loss', 'content': 0.07542714476585388, 'timestamp': '2025-10-01 04:43:27.408966', 'step': 18894, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:43:27.462397', 'step': 18894, 'epoch': 3} {'type': 'loss', 'content': 0.05417626351118088, 'timestamp': '2025-10-01 04:43:27.464570', 'step': 18895, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:43:27.517454', 'step': 18895, 'epoch': 3} {'type': 'loss', 'content': 0.06389261037111282, 'timestamp': '2025-10-01 04:43:27.523347', 'step': 18896, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:27.575957', 'step': 18896, 'epoch': 3} {'type': 'loss', 'content': 0.13810808956623077, 'timestamp': '2025-10-01 04:43:27.578254', 'step': 18897, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:27.632507', 'step': 18897, 'epoch': 3} {'type': 'loss', 'content': 0.07988619059324265, 'timestamp': '2025-10-01 04:43:27.634920', 'step': 18898, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:27.688049', 'step': 18898, 'epoch': 3} {'type': 'loss', 'content': 0.12461791932582855, 'timestamp': '2025-10-01 04:43:27.690160', 'step': 18899, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:27.743450', 'step': 18899, 'epoch': 3} {'type': 'loss', 'content': 0.05601464956998825, 'timestamp': '2025-10-01 04:43:27.750928', 'step': 18900, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:27.803342', 'step': 18900, 'epoch': 3} {'type': 'loss', 'content': 0.05705661326646805, 'timestamp': '2025-10-01 04:43:27.805476', 'step': 18901, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-01 04:43:27.859032', 'step': 18901, 'epoch': 3} {'type': 'loss', 'content': 0.08096452802419662, 'timestamp': '2025-10-01 04:43:27.861357', 'step': 18902, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:27.915635', 'step': 18902, 'epoch': 3} {'type': 'loss', 'content': 0.0944347009062767, 'timestamp': '2025-10-01 04:43:27.922232', 'step': 18903, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:27.975793', 'step': 18903, 'epoch': 3} {'type': 'loss', 'content': 0.04566546902060509, 'timestamp': '2025-10-01 04:43:27.981911', 'step': 18904, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:28.037979', 'step': 18904, 'epoch': 3} {'type': 'loss', 'content': 0.0702582597732544, 'timestamp': '2025-10-01 04:43:28.042460', 'step': 18905, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:43:28.098413', 'step': 18905, 'epoch': 3} {'type': 'loss', 'content': 0.042178086936473846, 'timestamp': '2025-10-01 04:43:28.100540', 'step': 18906, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:28.154576', 'step': 18906, 'epoch': 3} {'type': 'loss', 'content': 0.08042104542255402, 'timestamp': '2025-10-01 04:43:28.156793', 'step': 18907, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:28.210122', 'step': 18907, 'epoch': 3} {'type': 'loss', 'content': 0.05553648620843887, 'timestamp': '2025-10-01 04:43:28.216188', 'step': 18908, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:28.269024', 'step': 18908, 'epoch': 3} {'type': 'loss', 'content': 0.10449283570051193, 'timestamp': '2025-10-01 04:43:28.273068', 'step': 18909, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:43:28.332402', 'step': 18909, 'epoch': 3} {'type': 'loss', 'content': 0.06982393562793732, 'timestamp': '2025-10-01 04:43:28.334479', 'step': 18910, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:28.388253', 'step': 18910, 'epoch': 3} {'type': 'loss', 'content': 0.0888478010892868, 'timestamp': '2025-10-01 04:43:28.390381', 'step': 18911, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:28.443486', 'step': 18911, 'epoch': 3} {'type': 'loss', 'content': 0.03765328228473663, 'timestamp': '2025-10-01 04:43:28.449523', 'step': 18912, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:28.502281', 'step': 18912, 'epoch': 3} {'type': 'loss', 'content': 0.03661329671740532, 'timestamp': '2025-10-01 04:43:28.504487', 'step': 18913, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:28.557283', 'step': 18913, 'epoch': 3} {'type': 'loss', 'content': 0.04224979504942894, 'timestamp': '2025-10-01 04:43:28.560066', 'step': 18914, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:28.615964', 'step': 18914, 'epoch': 3} {'type': 'loss', 'content': 0.04899483546614647, 'timestamp': '2025-10-01 04:43:28.618434', 'step': 18915, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:28.672590', 'step': 18915, 'epoch': 3} {'type': 'loss', 'content': 0.11600812524557114, 'timestamp': '2025-10-01 04:43:28.678032', 'step': 18916, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:28.732950', 'step': 18916, 'epoch': 3} {'type': 'loss', 'content': 0.052638374269008636, 'timestamp': '2025-10-01 04:43:28.736052', 'step': 18917, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:28.788856', 'step': 18917, 'epoch': 3} {'type': 'loss', 'content': 0.16333311796188354, 'timestamp': '2025-10-01 04:43:28.793908', 'step': 18918, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:28.851556', 'step': 18918, 'epoch': 3} {'type': 'loss', 'content': 0.07449135184288025, 'timestamp': '2025-10-01 04:43:28.853625', 'step': 18919, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:43:28.913537', 'step': 18919, 'epoch': 3} {'type': 'loss', 'content': 0.2047237604856491, 'timestamp': '2025-10-01 04:43:28.919196', 'step': 18920, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:28.971421', 'step': 18920, 'epoch': 3} {'type': 'loss', 'content': 0.08807931840419769, 'timestamp': '2025-10-01 04:43:28.973620', 'step': 18921, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:29.031111', 'step': 18921, 'epoch': 3} {'type': 'loss', 'content': 0.037188123911619186, 'timestamp': '2025-10-01 04:43:29.033275', 'step': 18922, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:29.086733', 'step': 18922, 'epoch': 3} {'type': 'loss', 'content': 0.07024671137332916, 'timestamp': '2025-10-01 04:43:29.088410', 'step': 18923, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:43:29.141557', 'step': 18923, 'epoch': 3} {'type': 'loss', 'content': 0.05309819430112839, 'timestamp': '2025-10-01 04:43:29.147097', 'step': 18924, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:43:29.202641', 'step': 18924, 'epoch': 3} {'type': 'loss', 'content': 0.13926252722740173, 'timestamp': '2025-10-01 04:43:29.204650', 'step': 18925, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:29.258497', 'step': 18925, 'epoch': 3} {'type': 'loss', 'content': 0.1032652035355568, 'timestamp': '2025-10-01 04:43:29.260792', 'step': 18926, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:29.314396', 'step': 18926, 'epoch': 3} {'type': 'loss', 'content': 0.09654944390058517, 'timestamp': '2025-10-01 04:43:29.316598', 'step': 18927, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:29.371195', 'step': 18927, 'epoch': 3} {'type': 'loss', 'content': 0.08088692277669907, 'timestamp': '2025-10-01 04:43:29.377087', 'step': 18928, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:29.430918', 'step': 18928, 'epoch': 3} {'type': 'loss', 'content': 0.1727127730846405, 'timestamp': '2025-10-01 04:43:29.433495', 'step': 18929, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:29.486560', 'step': 18929, 'epoch': 3} {'type': 'loss', 'content': 0.058390479534864426, 'timestamp': '2025-10-01 04:43:29.488501', 'step': 18930, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:43:29.542255', 'step': 18930, 'epoch': 3} {'type': 'loss', 'content': 0.05219360068440437, 'timestamp': '2025-10-01 04:43:29.546086', 'step': 18931, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:29.600105', 'step': 18931, 'epoch': 3} {'type': 'loss', 'content': 0.07779314368963242, 'timestamp': '2025-10-01 04:43:29.605565', 'step': 18932, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:29.657927', 'step': 18932, 'epoch': 3} {'type': 'loss', 'content': 0.16848815977573395, 'timestamp': '2025-10-01 04:43:29.660032', 'step': 18933, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:29.712584', 'step': 18933, 'epoch': 3} {'type': 'loss', 'content': 0.09278446435928345, 'timestamp': '2025-10-01 04:43:29.715147', 'step': 18934, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:43:29.767975', 'step': 18934, 'epoch': 3} {'type': 'loss', 'content': 0.10460071265697479, 'timestamp': '2025-10-01 04:43:29.770534', 'step': 18935, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:43:29.823362', 'step': 18935, 'epoch': 3} {'type': 'loss', 'content': 0.08664686232805252, 'timestamp': '2025-10-01 04:43:29.829110', 'step': 18936, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:29.881542', 'step': 18936, 'epoch': 3} {'type': 'loss', 'content': 0.04660818725824356, 'timestamp': '2025-10-01 04:43:29.883782', 'step': 18937, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:29.937477', 'step': 18937, 'epoch': 3} {'type': 'loss', 'content': 0.1637192815542221, 'timestamp': '2025-10-01 04:43:29.939420', 'step': 18938, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:29.992487', 'step': 18938, 'epoch': 3} {'type': 'loss', 'content': 0.08639756590127945, 'timestamp': '2025-10-01 04:43:30.000506', 'step': 18939, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:30.053336', 'step': 18939, 'epoch': 3} {'type': 'loss', 'content': 0.026605386286973953, 'timestamp': '2025-10-01 04:43:30.059381', 'step': 18940, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:30.112384', 'step': 18940, 'epoch': 3} {'type': 'loss', 'content': 0.11554086208343506, 'timestamp': '2025-10-01 04:43:30.114469', 'step': 18941, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:30.176863', 'step': 18941, 'epoch': 3} {'type': 'loss', 'content': 0.22410987317562103, 'timestamp': '2025-10-01 04:43:30.179045', 'step': 18942, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:30.232212', 'step': 18942, 'epoch': 3} {'type': 'loss', 'content': 0.09745380282402039, 'timestamp': '2025-10-01 04:43:30.234358', 'step': 18943, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:30.287358', 'step': 18943, 'epoch': 3} {'type': 'loss', 'content': 0.08042389899492264, 'timestamp': '2025-10-01 04:43:30.292771', 'step': 18944, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:30.345611', 'step': 18944, 'epoch': 3} {'type': 'loss', 'content': 0.07433098554611206, 'timestamp': '2025-10-01 04:43:30.347406', 'step': 18945, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:30.400707', 'step': 18945, 'epoch': 3} {'type': 'loss', 'content': 0.06772740930318832, 'timestamp': '2025-10-01 04:43:30.402771', 'step': 18946, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:30.455661', 'step': 18946, 'epoch': 3} {'type': 'loss', 'content': 0.04084109142422676, 'timestamp': '2025-10-01 04:43:30.457948', 'step': 18947, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:30.510918', 'step': 18947, 'epoch': 3} {'type': 'loss', 'content': 0.09585075825452805, 'timestamp': '2025-10-01 04:43:30.516727', 'step': 18948, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:30.568762', 'step': 18948, 'epoch': 3} {'type': 'loss', 'content': 0.10322640091180801, 'timestamp': '2025-10-01 04:43:30.570812', 'step': 18949, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:30.630112', 'step': 18949, 'epoch': 3} {'type': 'loss', 'content': 0.09112009406089783, 'timestamp': '2025-10-01 04:43:30.632331', 'step': 18950, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:43:30.686396', 'step': 18950, 'epoch': 3} {'type': 'loss', 'content': 0.0495939664542675, 'timestamp': '2025-10-01 04:43:30.688212', 'step': 18951, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:30.741247', 'step': 18951, 'epoch': 3} {'type': 'loss', 'content': 0.10999336838722229, 'timestamp': '2025-10-01 04:43:30.746543', 'step': 18952, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:30.799216', 'step': 18952, 'epoch': 3} {'type': 'loss', 'content': 0.10326024144887924, 'timestamp': '2025-10-01 04:43:30.805660', 'step': 18953, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:43:30.858752', 'step': 18953, 'epoch': 3} {'type': 'loss', 'content': 0.06662391126155853, 'timestamp': '2025-10-01 04:43:30.860634', 'step': 18954, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:30.913616', 'step': 18954, 'epoch': 3} {'type': 'loss', 'content': 0.06249785050749779, 'timestamp': '2025-10-01 04:43:30.915841', 'step': 18955, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:43:30.969937', 'step': 18955, 'epoch': 3} {'type': 'loss', 'content': 0.09694519639015198, 'timestamp': '2025-10-01 04:43:30.976076', 'step': 18956, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:31.030142', 'step': 18956, 'epoch': 3} {'type': 'loss', 'content': 0.10183995217084885, 'timestamp': '2025-10-01 04:43:31.032212', 'step': 18957, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:31.086310', 'step': 18957, 'epoch': 3} {'type': 'loss', 'content': 0.05636683106422424, 'timestamp': '2025-10-01 04:43:31.096360', 'step': 18958, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:31.149918', 'step': 18958, 'epoch': 3} {'type': 'loss', 'content': 0.10210560262203217, 'timestamp': '2025-10-01 04:43:31.151696', 'step': 18959, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:31.206206', 'step': 18959, 'epoch': 3} {'type': 'loss', 'content': 0.041840214282274246, 'timestamp': '2025-10-01 04:43:31.212193', 'step': 18960, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:31.266576', 'step': 18960, 'epoch': 3} {'type': 'loss', 'content': 0.13366802036762238, 'timestamp': '2025-10-01 04:43:31.271603', 'step': 18961, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:31.331418', 'step': 18961, 'epoch': 3} {'type': 'loss', 'content': 0.0694114938378334, 'timestamp': '2025-10-01 04:43:31.333619', 'step': 18962, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:31.386700', 'step': 18962, 'epoch': 3} {'type': 'loss', 'content': 0.1325983852148056, 'timestamp': '2025-10-01 04:43:31.388857', 'step': 18963, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:31.441859', 'step': 18963, 'epoch': 3} {'type': 'loss', 'content': 0.08589213341474533, 'timestamp': '2025-10-01 04:43:31.448316', 'step': 18964, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:43:31.504651', 'step': 18964, 'epoch': 3} {'type': 'loss', 'content': 0.06913985311985016, 'timestamp': '2025-10-01 04:43:31.507196', 'step': 18965, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:31.567601', 'step': 18965, 'epoch': 3} {'type': 'loss', 'content': 0.023996487259864807, 'timestamp': '2025-10-01 04:43:31.569494', 'step': 18966, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:31.622574', 'step': 18966, 'epoch': 3} {'type': 'loss', 'content': 0.053894512355327606, 'timestamp': '2025-10-01 04:43:31.624366', 'step': 18967, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:31.677464', 'step': 18967, 'epoch': 3} {'type': 'loss', 'content': 0.12015238404273987, 'timestamp': '2025-10-01 04:43:31.683407', 'step': 18968, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:31.737013', 'step': 18968, 'epoch': 3} {'type': 'loss', 'content': 0.06317153573036194, 'timestamp': '2025-10-01 04:43:31.739331', 'step': 18969, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:31.793900', 'step': 18969, 'epoch': 3} {'type': 'loss', 'content': 0.16645486652851105, 'timestamp': '2025-10-01 04:43:31.798637', 'step': 18970, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:31.854834', 'step': 18970, 'epoch': 3} {'type': 'loss', 'content': 0.062987320125103, 'timestamp': '2025-10-01 04:43:31.857001', 'step': 18971, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:31.909867', 'step': 18971, 'epoch': 3} {'type': 'loss', 'content': 0.10326774418354034, 'timestamp': '2025-10-01 04:43:31.915709', 'step': 18972, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:31.968401', 'step': 18972, 'epoch': 3} {'type': 'loss', 'content': 0.05378245934844017, 'timestamp': '2025-10-01 04:43:31.973419', 'step': 18973, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:32.030532', 'step': 18973, 'epoch': 3} {'type': 'loss', 'content': 0.12317708134651184, 'timestamp': '2025-10-01 04:43:32.032645', 'step': 18974, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:43:32.085802', 'step': 18974, 'epoch': 3} {'type': 'loss', 'content': 0.07890180498361588, 'timestamp': '2025-10-01 04:43:32.088660', 'step': 18975, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:32.142415', 'step': 18975, 'epoch': 3} {'type': 'loss', 'content': 0.06392098218202591, 'timestamp': '2025-10-01 04:43:32.151118', 'step': 18976, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:32.204605', 'step': 18976, 'epoch': 3} {'type': 'loss', 'content': 0.12416142225265503, 'timestamp': '2025-10-01 04:43:32.206771', 'step': 18977, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:32.260919', 'step': 18977, 'epoch': 3} {'type': 'loss', 'content': 0.10205185413360596, 'timestamp': '2025-10-01 04:43:32.263696', 'step': 18978, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:43:32.317212', 'step': 18978, 'epoch': 3} {'type': 'loss', 'content': 0.11312051117420197, 'timestamp': '2025-10-01 04:43:32.319254', 'step': 18979, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:32.372154', 'step': 18979, 'epoch': 3} {'type': 'loss', 'content': 0.136287659406662, 'timestamp': '2025-10-01 04:43:32.377420', 'step': 18980, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:32.432153', 'step': 18980, 'epoch': 3} {'type': 'loss', 'content': 0.15954770147800446, 'timestamp': '2025-10-01 04:43:32.434016', 'step': 18981, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:32.487606', 'step': 18981, 'epoch': 3} {'type': 'loss', 'content': 0.13426627218723297, 'timestamp': '2025-10-01 04:43:32.490249', 'step': 18982, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:32.544481', 'step': 18982, 'epoch': 3} {'type': 'loss', 'content': 0.11985327303409576, 'timestamp': '2025-10-01 04:43:32.547192', 'step': 18983, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:32.601648', 'step': 18983, 'epoch': 3} {'type': 'loss', 'content': 0.15704037249088287, 'timestamp': '2025-10-01 04:43:32.608935', 'step': 18984, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:32.666215', 'step': 18984, 'epoch': 3} {'type': 'loss', 'content': 0.08077580481767654, 'timestamp': '2025-10-01 04:43:32.668793', 'step': 18985, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:32.723301', 'step': 18985, 'epoch': 3} {'type': 'loss', 'content': 0.07557978481054306, 'timestamp': '2025-10-01 04:43:32.725572', 'step': 18986, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:32.781975', 'step': 18986, 'epoch': 3} {'type': 'loss', 'content': 0.05831034854054451, 'timestamp': '2025-10-01 04:43:32.784524', 'step': 18987, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:32.839360', 'step': 18987, 'epoch': 3} {'type': 'loss', 'content': 0.061408881098032, 'timestamp': '2025-10-01 04:43:32.845548', 'step': 18988, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:32.899455', 'step': 18988, 'epoch': 3} {'type': 'loss', 'content': 0.14867478609085083, 'timestamp': '2025-10-01 04:43:32.901970', 'step': 18989, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:43:32.956640', 'step': 18989, 'epoch': 3} {'type': 'loss', 'content': 0.07238593697547913, 'timestamp': '2025-10-01 04:43:32.959271', 'step': 18990, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:33.013367', 'step': 18990, 'epoch': 3} {'type': 'loss', 'content': 0.08040410280227661, 'timestamp': '2025-10-01 04:43:33.015858', 'step': 18991, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:33.073447', 'step': 18991, 'epoch': 3} {'type': 'loss', 'content': 0.10279391705989838, 'timestamp': '2025-10-01 04:43:33.079343', 'step': 18992, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:33.132850', 'step': 18992, 'epoch': 3} {'type': 'loss', 'content': 0.08569953590631485, 'timestamp': '2025-10-01 04:43:33.135314', 'step': 18993, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:33.189657', 'step': 18993, 'epoch': 3} {'type': 'loss', 'content': 0.054002195596694946, 'timestamp': '2025-10-01 04:43:33.193300', 'step': 18994, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:43:33.247134', 'step': 18994, 'epoch': 3} {'type': 'loss', 'content': 0.08424805849790573, 'timestamp': '2025-10-01 04:43:33.249267', 'step': 18995, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:43:33.302871', 'step': 18995, 'epoch': 3} {'type': 'loss', 'content': 0.09506379067897797, 'timestamp': '2025-10-01 04:43:33.309247', 'step': 18996, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:33.363834', 'step': 18996, 'epoch': 3} {'type': 'loss', 'content': 0.097171351313591, 'timestamp': '2025-10-01 04:43:33.367924', 'step': 18997, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:33.421840', 'step': 18997, 'epoch': 3} {'type': 'loss', 'content': 0.05928272008895874, 'timestamp': '2025-10-01 04:43:33.424025', 'step': 18998, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:33.477788', 'step': 18998, 'epoch': 3} {'type': 'loss', 'content': 0.04211675748229027, 'timestamp': '2025-10-01 04:43:33.480460', 'step': 18999, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:33.534523', 'step': 18999, 'epoch': 3} {'type': 'loss', 'content': 0.13712434470653534, 'timestamp': '2025-10-01 04:43:33.540613', 'step': 19000, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 19000', 'timestamp': '2025-10-01 04:43:33.928423', 'step': 19000, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:33.984400', 'step': 19000, 'epoch': 3} {'type': 'loss', 'content': 0.06639282405376434, 'timestamp': '2025-10-01 04:43:33.986589', 'step': 19001, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:34.040400', 'step': 19001, 'epoch': 3} {'type': 'loss', 'content': 0.10764708369970322, 'timestamp': '2025-10-01 04:43:34.042662', 'step': 19002, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:34.097877', 'step': 19002, 'epoch': 3} {'type': 'loss', 'content': 0.07776424288749695, 'timestamp': '2025-10-01 04:43:34.100558', 'step': 19003, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:43:34.153876', 'step': 19003, 'epoch': 3} {'type': 'loss', 'content': 0.09539153426885605, 'timestamp': '2025-10-01 04:43:34.160019', 'step': 19004, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:34.212577', 'step': 19004, 'epoch': 3} {'type': 'loss', 'content': 0.10629161447286606, 'timestamp': '2025-10-01 04:43:34.214811', 'step': 19005, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:34.268390', 'step': 19005, 'epoch': 3} {'type': 'loss', 'content': 0.07509089261293411, 'timestamp': '2025-10-01 04:43:34.270557', 'step': 19006, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:34.324907', 'step': 19006, 'epoch': 3} {'type': 'loss', 'content': 0.11553407460451126, 'timestamp': '2025-10-01 04:43:34.327110', 'step': 19007, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:34.380951', 'step': 19007, 'epoch': 3} {'type': 'loss', 'content': 0.10178620368242264, 'timestamp': '2025-10-01 04:43:34.387490', 'step': 19008, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:34.441685', 'step': 19008, 'epoch': 3} {'type': 'loss', 'content': 0.05896192416548729, 'timestamp': '2025-10-01 04:43:34.443747', 'step': 19009, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:34.497160', 'step': 19009, 'epoch': 3} {'type': 'loss', 'content': 0.03998207300901413, 'timestamp': '2025-10-01 04:43:34.499275', 'step': 19010, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:34.553310', 'step': 19010, 'epoch': 3} {'type': 'loss', 'content': 0.019202567636966705, 'timestamp': '2025-10-01 04:43:34.555457', 'step': 19011, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:43:34.609012', 'step': 19011, 'epoch': 3} {'type': 'loss', 'content': 0.03763268142938614, 'timestamp': '2025-10-01 04:43:34.614947', 'step': 19012, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:34.667892', 'step': 19012, 'epoch': 3} {'type': 'loss', 'content': 0.044744741171598434, 'timestamp': '2025-10-01 04:43:34.669935', 'step': 19013, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:34.723721', 'step': 19013, 'epoch': 3} {'type': 'loss', 'content': 0.05227839946746826, 'timestamp': '2025-10-01 04:43:34.725889', 'step': 19014, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:34.779376', 'step': 19014, 'epoch': 3} {'type': 'loss', 'content': 0.07642378658056259, 'timestamp': '2025-10-01 04:43:34.781747', 'step': 19015, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:34.835311', 'step': 19015, 'epoch': 3} {'type': 'loss', 'content': 0.11876553297042847, 'timestamp': '2025-10-01 04:43:34.841549', 'step': 19016, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:34.901598', 'step': 19016, 'epoch': 3} {'type': 'loss', 'content': 0.05816512182354927, 'timestamp': '2025-10-01 04:43:34.903894', 'step': 19017, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:43:34.957360', 'step': 19017, 'epoch': 3} {'type': 'loss', 'content': 0.0880264937877655, 'timestamp': '2025-10-01 04:43:34.965662', 'step': 19018, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:35.019160', 'step': 19018, 'epoch': 3} {'type': 'loss', 'content': 0.11560821533203125, 'timestamp': '2025-10-01 04:43:35.021301', 'step': 19019, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:35.075680', 'step': 19019, 'epoch': 3} {'type': 'loss', 'content': 0.03507663309574127, 'timestamp': '2025-10-01 04:43:35.081552', 'step': 19020, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:35.134128', 'step': 19020, 'epoch': 3} {'type': 'loss', 'content': 0.038968976587057114, 'timestamp': '2025-10-01 04:43:35.140608', 'step': 19021, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:43:35.193683', 'step': 19021, 'epoch': 3} {'type': 'loss', 'content': 0.0827663242816925, 'timestamp': '2025-10-01 04:43:35.195873', 'step': 19022, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:35.249950', 'step': 19022, 'epoch': 3} {'type': 'loss', 'content': 0.12816694378852844, 'timestamp': '2025-10-01 04:43:35.252872', 'step': 19023, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:35.306354', 'step': 19023, 'epoch': 3} {'type': 'loss', 'content': 0.061540085822343826, 'timestamp': '2025-10-01 04:43:35.313213', 'step': 19024, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:43:35.366038', 'step': 19024, 'epoch': 3} {'type': 'loss', 'content': 0.08478430658578873, 'timestamp': '2025-10-01 04:43:35.368260', 'step': 19025, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:35.422152', 'step': 19025, 'epoch': 3} {'type': 'loss', 'content': 0.09003692865371704, 'timestamp': '2025-10-01 04:43:35.425153', 'step': 19026, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:35.478882', 'step': 19026, 'epoch': 3} {'type': 'loss', 'content': 0.09901285916566849, 'timestamp': '2025-10-01 04:43:35.481447', 'step': 19027, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:35.534774', 'step': 19027, 'epoch': 3} {'type': 'loss', 'content': 0.09021726250648499, 'timestamp': '2025-10-01 04:43:35.541100', 'step': 19028, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:35.593794', 'step': 19028, 'epoch': 3} {'type': 'loss', 'content': 0.09919065237045288, 'timestamp': '2025-10-01 04:43:35.595823', 'step': 19029, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:35.648856', 'step': 19029, 'epoch': 3} {'type': 'loss', 'content': 0.08571343868970871, 'timestamp': '2025-10-01 04:43:35.651181', 'step': 19030, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:35.704494', 'step': 19030, 'epoch': 3} {'type': 'loss', 'content': 0.1576603353023529, 'timestamp': '2025-10-01 04:43:35.707568', 'step': 19031, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:35.761970', 'step': 19031, 'epoch': 3} {'type': 'loss', 'content': 0.06333661824464798, 'timestamp': '2025-10-01 04:43:35.767773', 'step': 19032, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:35.820645', 'step': 19032, 'epoch': 3} {'type': 'loss', 'content': 0.11142710596323013, 'timestamp': '2025-10-01 04:43:35.822830', 'step': 19033, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:43:35.881642', 'step': 19033, 'epoch': 3} {'type': 'loss', 'content': 0.06564849615097046, 'timestamp': '2025-10-01 04:43:35.883989', 'step': 19034, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:35.937440', 'step': 19034, 'epoch': 3} {'type': 'loss', 'content': 0.09534887969493866, 'timestamp': '2025-10-01 04:43:35.939580', 'step': 19035, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:43:36.000526', 'step': 19035, 'epoch': 3} {'type': 'loss', 'content': 0.0077314237132668495, 'timestamp': '2025-10-01 04:43:36.006493', 'step': 19036, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:36.059410', 'step': 19036, 'epoch': 3} {'type': 'loss', 'content': 0.11391353607177734, 'timestamp': '2025-10-01 04:43:36.061492', 'step': 19037, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:43:36.114604', 'step': 19037, 'epoch': 3} {'type': 'loss', 'content': 0.05128583312034607, 'timestamp': '2025-10-01 04:43:36.116794', 'step': 19038, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:36.169789', 'step': 19038, 'epoch': 3} {'type': 'loss', 'content': 0.08919114619493484, 'timestamp': '2025-10-01 04:43:36.171899', 'step': 19039, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:36.224710', 'step': 19039, 'epoch': 3} {'type': 'loss', 'content': 0.04016521945595741, 'timestamp': '2025-10-01 04:43:36.230429', 'step': 19040, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:36.282697', 'step': 19040, 'epoch': 3} {'type': 'loss', 'content': 0.13058356940746307, 'timestamp': '2025-10-01 04:43:36.284807', 'step': 19041, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:36.338303', 'step': 19041, 'epoch': 3} {'type': 'loss', 'content': 0.062226228415966034, 'timestamp': '2025-10-01 04:43:36.342586', 'step': 19042, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:36.396133', 'step': 19042, 'epoch': 3} {'type': 'loss', 'content': 0.0841173380613327, 'timestamp': '2025-10-01 04:43:36.398302', 'step': 19043, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:36.451777', 'step': 19043, 'epoch': 3} {'type': 'loss', 'content': 0.09918621182441711, 'timestamp': '2025-10-01 04:43:36.457526', 'step': 19044, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:36.510541', 'step': 19044, 'epoch': 3} {'type': 'loss', 'content': 0.11531190574169159, 'timestamp': '2025-10-01 04:43:36.513062', 'step': 19045, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:36.566861', 'step': 19045, 'epoch': 3} {'type': 'loss', 'content': 0.06837157160043716, 'timestamp': '2025-10-01 04:43:36.569364', 'step': 19046, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:36.630132', 'step': 19046, 'epoch': 3} {'type': 'loss', 'content': 0.08720242232084274, 'timestamp': '2025-10-01 04:43:36.632989', 'step': 19047, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:43:36.692216', 'step': 19047, 'epoch': 3} {'type': 'loss', 'content': 0.04795985668897629, 'timestamp': '2025-10-01 04:43:36.705171', 'step': 19048, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:36.759291', 'step': 19048, 'epoch': 3} {'type': 'loss', 'content': 0.04043417051434517, 'timestamp': '2025-10-01 04:43:36.761425', 'step': 19049, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:36.816067', 'step': 19049, 'epoch': 3} {'type': 'loss', 'content': 0.13694703578948975, 'timestamp': '2025-10-01 04:43:36.818214', 'step': 19050, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:36.873685', 'step': 19050, 'epoch': 3} {'type': 'loss', 'content': 0.12363428622484207, 'timestamp': '2025-10-01 04:43:36.875631', 'step': 19051, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:36.929777', 'step': 19051, 'epoch': 3} {'type': 'loss', 'content': 0.06447508186101913, 'timestamp': '2025-10-01 04:43:36.936153', 'step': 19052, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:36.990348', 'step': 19052, 'epoch': 3} {'type': 'loss', 'content': 0.1013525202870369, 'timestamp': '2025-10-01 04:43:36.992521', 'step': 19053, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:43:37.046608', 'step': 19053, 'epoch': 3} {'type': 'loss', 'content': 0.11143644899129868, 'timestamp': '2025-10-01 04:43:37.048783', 'step': 19054, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:37.102705', 'step': 19054, 'epoch': 3} {'type': 'loss', 'content': 0.07123492658138275, 'timestamp': '2025-10-01 04:43:37.105086', 'step': 19055, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:37.158687', 'step': 19055, 'epoch': 3} {'type': 'loss', 'content': 0.1172967180609703, 'timestamp': '2025-10-01 04:43:37.165005', 'step': 19056, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:37.217725', 'step': 19056, 'epoch': 3} {'type': 'loss', 'content': 0.09146995842456818, 'timestamp': '2025-10-01 04:43:37.219595', 'step': 19057, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:43:37.272463', 'step': 19057, 'epoch': 3} {'type': 'loss', 'content': 0.14424389600753784, 'timestamp': '2025-10-01 04:43:37.274726', 'step': 19058, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:37.328495', 'step': 19058, 'epoch': 3} {'type': 'loss', 'content': 0.11328138411045074, 'timestamp': '2025-10-01 04:43:37.330781', 'step': 19059, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:37.384385', 'step': 19059, 'epoch': 3} {'type': 'loss', 'content': 0.05737932771444321, 'timestamp': '2025-10-01 04:43:37.392746', 'step': 19060, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:37.446608', 'step': 19060, 'epoch': 3} {'type': 'loss', 'content': 0.018680695444345474, 'timestamp': '2025-10-01 04:43:37.448677', 'step': 19061, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:37.501570', 'step': 19061, 'epoch': 3} {'type': 'loss', 'content': 0.025099964812397957, 'timestamp': '2025-10-01 04:43:37.503685', 'step': 19062, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:37.557112', 'step': 19062, 'epoch': 3} {'type': 'loss', 'content': 0.08239493519067764, 'timestamp': '2025-10-01 04:43:37.559385', 'step': 19063, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:37.612490', 'step': 19063, 'epoch': 3} {'type': 'loss', 'content': 0.03983119875192642, 'timestamp': '2025-10-01 04:43:37.618291', 'step': 19064, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:37.670652', 'step': 19064, 'epoch': 3} {'type': 'loss', 'content': 0.06811287999153137, 'timestamp': '2025-10-01 04:43:37.672810', 'step': 19065, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:37.726343', 'step': 19065, 'epoch': 3} {'type': 'loss', 'content': 0.08315721154212952, 'timestamp': '2025-10-01 04:43:37.728467', 'step': 19066, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:43:37.781933', 'step': 19066, 'epoch': 3} {'type': 'loss', 'content': 0.07293469458818436, 'timestamp': '2025-10-01 04:43:37.784213', 'step': 19067, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:37.837467', 'step': 19067, 'epoch': 3} {'type': 'loss', 'content': 0.05818468704819679, 'timestamp': '2025-10-01 04:43:37.843865', 'step': 19068, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:43:37.904117', 'step': 19068, 'epoch': 3} {'type': 'loss', 'content': 0.04766453057527542, 'timestamp': '2025-10-01 04:43:37.906348', 'step': 19069, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:43:37.960233', 'step': 19069, 'epoch': 3} {'type': 'loss', 'content': 0.07286302745342255, 'timestamp': '2025-10-01 04:43:37.963347', 'step': 19070, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:38.017333', 'step': 19070, 'epoch': 3} {'type': 'loss', 'content': 0.08695954084396362, 'timestamp': '2025-10-01 04:43:38.019457', 'step': 19071, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:38.073156', 'step': 19071, 'epoch': 3} {'type': 'loss', 'content': 0.04237060993909836, 'timestamp': '2025-10-01 04:43:38.079308', 'step': 19072, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:38.132580', 'step': 19072, 'epoch': 3} {'type': 'loss', 'content': 0.1023983284831047, 'timestamp': '2025-10-01 04:43:38.134764', 'step': 19073, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:38.188671', 'step': 19073, 'epoch': 3} {'type': 'loss', 'content': 0.09960328787565231, 'timestamp': '2025-10-01 04:43:38.190957', 'step': 19074, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:38.245352', 'step': 19074, 'epoch': 3} {'type': 'loss', 'content': 0.025619320571422577, 'timestamp': '2025-10-01 04:43:38.247601', 'step': 19075, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:38.302395', 'step': 19075, 'epoch': 3} {'type': 'loss', 'content': 0.05025997385382652, 'timestamp': '2025-10-01 04:43:38.308793', 'step': 19076, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:38.363723', 'step': 19076, 'epoch': 3} {'type': 'loss', 'content': 0.13042162358760834, 'timestamp': '2025-10-01 04:43:38.365856', 'step': 19077, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:43:38.421084', 'step': 19077, 'epoch': 3} {'type': 'loss', 'content': 0.10715331137180328, 'timestamp': '2025-10-01 04:43:38.423263', 'step': 19078, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:38.479206', 'step': 19078, 'epoch': 3} {'type': 'loss', 'content': 0.0667024552822113, 'timestamp': '2025-10-01 04:43:38.481345', 'step': 19079, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:38.535709', 'step': 19079, 'epoch': 3} {'type': 'loss', 'content': 0.10261713713407516, 'timestamp': '2025-10-01 04:43:38.542161', 'step': 19080, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:38.603037', 'step': 19080, 'epoch': 3} {'type': 'loss', 'content': 0.047170523554086685, 'timestamp': '2025-10-01 04:43:38.605157', 'step': 19081, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:43:38.659777', 'step': 19081, 'epoch': 3} {'type': 'loss', 'content': 0.119171142578125, 'timestamp': '2025-10-01 04:43:38.661881', 'step': 19082, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:38.717629', 'step': 19082, 'epoch': 3} {'type': 'loss', 'content': 0.12668883800506592, 'timestamp': '2025-10-01 04:43:38.719839', 'step': 19083, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:43:38.774049', 'step': 19083, 'epoch': 3} {'type': 'loss', 'content': 0.034419991075992584, 'timestamp': '2025-10-01 04:43:38.780352', 'step': 19084, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:38.833865', 'step': 19084, 'epoch': 3} {'type': 'loss', 'content': 0.060286425054073334, 'timestamp': '2025-10-01 04:43:38.835983', 'step': 19085, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:38.889465', 'step': 19085, 'epoch': 3} {'type': 'loss', 'content': 0.08312875032424927, 'timestamp': '2025-10-01 04:43:38.891587', 'step': 19086, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:38.949925', 'step': 19086, 'epoch': 3} {'type': 'loss', 'content': 0.05227602273225784, 'timestamp': '2025-10-01 04:43:38.952229', 'step': 19087, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:39.012756', 'step': 19087, 'epoch': 3} {'type': 'loss', 'content': 0.0354604609310627, 'timestamp': '2025-10-01 04:43:39.020234', 'step': 19088, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:43:39.080641', 'step': 19088, 'epoch': 3} {'type': 'loss', 'content': 0.03227485343813896, 'timestamp': '2025-10-01 04:43:39.088515', 'step': 19089, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:39.144119', 'step': 19089, 'epoch': 3} {'type': 'loss', 'content': 0.06976684927940369, 'timestamp': '2025-10-01 04:43:39.146447', 'step': 19090, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:39.200448', 'step': 19090, 'epoch': 3} {'type': 'loss', 'content': 0.032077644020318985, 'timestamp': '2025-10-01 04:43:39.202588', 'step': 19091, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:39.255344', 'step': 19091, 'epoch': 3} {'type': 'loss', 'content': 0.0654105544090271, 'timestamp': '2025-10-01 04:43:39.261531', 'step': 19092, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:39.314274', 'step': 19092, 'epoch': 3} {'type': 'loss', 'content': 0.06082894280552864, 'timestamp': '2025-10-01 04:43:39.316386', 'step': 19093, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:39.369269', 'step': 19093, 'epoch': 3} {'type': 'loss', 'content': 0.06878252327442169, 'timestamp': '2025-10-01 04:43:39.371563', 'step': 19094, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:39.425044', 'step': 19094, 'epoch': 3} {'type': 'loss', 'content': 0.038423940539360046, 'timestamp': '2025-10-01 04:43:39.427243', 'step': 19095, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:43:39.480353', 'step': 19095, 'epoch': 3} {'type': 'loss', 'content': 0.04247643053531647, 'timestamp': '2025-10-01 04:43:39.486338', 'step': 19096, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:39.539625', 'step': 19096, 'epoch': 3} {'type': 'loss', 'content': 0.09285721182823181, 'timestamp': '2025-10-01 04:43:39.542221', 'step': 19097, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:39.596427', 'step': 19097, 'epoch': 3} {'type': 'loss', 'content': 0.08208408951759338, 'timestamp': '2025-10-01 04:43:39.599667', 'step': 19098, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:39.653765', 'step': 19098, 'epoch': 3} {'type': 'loss', 'content': 0.10673888772726059, 'timestamp': '2025-10-01 04:43:39.655891', 'step': 19099, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:39.709373', 'step': 19099, 'epoch': 3} {'type': 'loss', 'content': 0.09390578418970108, 'timestamp': '2025-10-01 04:43:39.715135', 'step': 19100, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:39.768503', 'step': 19100, 'epoch': 3} {'type': 'loss', 'content': 0.03753410279750824, 'timestamp': '2025-10-01 04:43:39.770685', 'step': 19101, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:39.824386', 'step': 19101, 'epoch': 3} {'type': 'loss', 'content': 0.103915736079216, 'timestamp': '2025-10-01 04:43:39.826459', 'step': 19102, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:39.880704', 'step': 19102, 'epoch': 3} {'type': 'loss', 'content': 0.1246977373957634, 'timestamp': '2025-10-01 04:43:39.882933', 'step': 19103, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:39.936044', 'step': 19103, 'epoch': 3} {'type': 'loss', 'content': 0.05184876173734665, 'timestamp': '2025-10-01 04:43:39.942520', 'step': 19104, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:39.997341', 'step': 19104, 'epoch': 3} {'type': 'loss', 'content': 0.09278122335672379, 'timestamp': '2025-10-01 04:43:40.009881', 'step': 19105, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:40.064420', 'step': 19105, 'epoch': 3} {'type': 'loss', 'content': 0.05428534001111984, 'timestamp': '2025-10-01 04:43:40.066585', 'step': 19106, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-01 04:43:52.843235', 'step': 19106, 'epoch': 3} {'type': 'pplx', 'content': 12027.727023531997, 'timestamp': '2025-10-01 04:43:52.847764', 'step': 19106, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:52.901784', 'step': 19106, 'epoch': 3} {'type': 'loss', 'content': 0.06326243281364441, 'timestamp': '2025-10-01 04:43:52.903885', 'step': 19107, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:52.957558', 'step': 19107, 'epoch': 3} {'type': 'loss', 'content': 0.05024613067507744, 'timestamp': '2025-10-01 04:43:52.963902', 'step': 19108, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:53.016325', 'step': 19108, 'epoch': 3} {'type': 'loss', 'content': 0.0112486956641078, 'timestamp': '2025-10-01 04:43:53.018443', 'step': 19109, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:43:53.071552', 'step': 19109, 'epoch': 3} {'type': 'loss', 'content': 0.09666697680950165, 'timestamp': '2025-10-01 04:43:53.073705', 'step': 19110, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:53.128472', 'step': 19110, 'epoch': 3} {'type': 'loss', 'content': 0.07801581919193268, 'timestamp': '2025-10-01 04:43:53.130664', 'step': 19111, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:53.183911', 'step': 19111, 'epoch': 3} {'type': 'loss', 'content': 0.09740858525037766, 'timestamp': '2025-10-01 04:43:53.191518', 'step': 19112, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:53.247418', 'step': 19112, 'epoch': 3} {'type': 'loss', 'content': 0.09157611429691315, 'timestamp': '2025-10-01 04:43:53.251920', 'step': 19113, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:43:53.306568', 'step': 19113, 'epoch': 3} {'type': 'loss', 'content': 0.1332857459783554, 'timestamp': '2025-10-01 04:43:53.308797', 'step': 19114, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:53.366004', 'step': 19114, 'epoch': 3} {'type': 'loss', 'content': 0.06025895103812218, 'timestamp': '2025-10-01 04:43:53.368378', 'step': 19115, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:53.422316', 'step': 19115, 'epoch': 3} {'type': 'loss', 'content': 0.06438615173101425, 'timestamp': '2025-10-01 04:43:53.428147', 'step': 19116, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:53.481198', 'step': 19116, 'epoch': 3} {'type': 'loss', 'content': 0.07230176031589508, 'timestamp': '2025-10-01 04:43:53.484028', 'step': 19117, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:53.537549', 'step': 19117, 'epoch': 3} {'type': 'loss', 'content': 0.04645053669810295, 'timestamp': '2025-10-01 04:43:53.539892', 'step': 19118, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:53.593344', 'step': 19118, 'epoch': 3} {'type': 'loss', 'content': 0.07229314744472504, 'timestamp': '2025-10-01 04:43:53.595531', 'step': 19119, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:53.649379', 'step': 19119, 'epoch': 3} {'type': 'loss', 'content': 0.04838532954454422, 'timestamp': '2025-10-01 04:43:53.655432', 'step': 19120, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:53.707968', 'step': 19120, 'epoch': 3} {'type': 'loss', 'content': 0.07430673390626907, 'timestamp': '2025-10-01 04:43:53.710013', 'step': 19121, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:53.762985', 'step': 19121, 'epoch': 3} {'type': 'loss', 'content': 0.1299583464860916, 'timestamp': '2025-10-01 04:43:53.765119', 'step': 19122, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:53.818165', 'step': 19122, 'epoch': 3} {'type': 'loss', 'content': 0.08876551687717438, 'timestamp': '2025-10-01 04:43:53.820416', 'step': 19123, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:43:53.873623', 'step': 19123, 'epoch': 3} {'type': 'loss', 'content': 0.14132939279079437, 'timestamp': '2025-10-01 04:43:53.879583', 'step': 19124, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:53.933009', 'step': 19124, 'epoch': 3} {'type': 'loss', 'content': 0.038887087255716324, 'timestamp': '2025-10-01 04:43:53.935984', 'step': 19125, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:53.990579', 'step': 19125, 'epoch': 3} {'type': 'loss', 'content': 0.09658294171094894, 'timestamp': '2025-10-01 04:43:53.993005', 'step': 19126, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:54.048393', 'step': 19126, 'epoch': 3} {'type': 'loss', 'content': 0.0895962342619896, 'timestamp': '2025-10-01 04:43:54.050681', 'step': 19127, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:54.103727', 'step': 19127, 'epoch': 3} {'type': 'loss', 'content': 0.05254723131656647, 'timestamp': '2025-10-01 04:43:54.109474', 'step': 19128, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:54.162780', 'step': 19128, 'epoch': 3} {'type': 'loss', 'content': 0.09480036050081253, 'timestamp': '2025-10-01 04:43:54.164966', 'step': 19129, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:54.218186', 'step': 19129, 'epoch': 3} {'type': 'loss', 'content': 0.10658349096775055, 'timestamp': '2025-10-01 04:43:54.220271', 'step': 19130, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:54.273437', 'step': 19130, 'epoch': 3} {'type': 'loss', 'content': 0.042261410504579544, 'timestamp': '2025-10-01 04:43:54.275541', 'step': 19131, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:54.329344', 'step': 19131, 'epoch': 3} {'type': 'loss', 'content': 0.21848979592323303, 'timestamp': '2025-10-01 04:43:54.335107', 'step': 19132, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:54.387524', 'step': 19132, 'epoch': 3} {'type': 'loss', 'content': 0.01955568604171276, 'timestamp': '2025-10-01 04:43:54.389658', 'step': 19133, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:54.442692', 'step': 19133, 'epoch': 3} {'type': 'loss', 'content': 0.05888466536998749, 'timestamp': '2025-10-01 04:43:54.444932', 'step': 19134, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:54.498054', 'step': 19134, 'epoch': 3} {'type': 'loss', 'content': 0.10399653762578964, 'timestamp': '2025-10-01 04:43:54.500775', 'step': 19135, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:54.554176', 'step': 19135, 'epoch': 3} {'type': 'loss', 'content': 0.08283860981464386, 'timestamp': '2025-10-01 04:43:54.560126', 'step': 19136, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:54.612991', 'step': 19136, 'epoch': 3} {'type': 'loss', 'content': 0.11511968821287155, 'timestamp': '2025-10-01 04:43:54.615242', 'step': 19137, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:54.668330', 'step': 19137, 'epoch': 3} {'type': 'loss', 'content': 0.12272252887487411, 'timestamp': '2025-10-01 04:43:54.670669', 'step': 19138, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:54.723900', 'step': 19138, 'epoch': 3} {'type': 'loss', 'content': 0.08821714669466019, 'timestamp': '2025-10-01 04:43:54.726356', 'step': 19139, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:43:54.779579', 'step': 19139, 'epoch': 3} {'type': 'loss', 'content': 0.095969058573246, 'timestamp': '2025-10-01 04:43:54.785641', 'step': 19140, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:54.838307', 'step': 19140, 'epoch': 3} {'type': 'loss', 'content': 0.06431220471858978, 'timestamp': '2025-10-01 04:43:54.840783', 'step': 19141, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:54.894448', 'step': 19141, 'epoch': 3} {'type': 'loss', 'content': 0.042534198611974716, 'timestamp': '2025-10-01 04:43:54.898934', 'step': 19142, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:54.952655', 'step': 19142, 'epoch': 3} {'type': 'loss', 'content': 0.10518679022789001, 'timestamp': '2025-10-01 04:43:54.955088', 'step': 19143, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:55.008600', 'step': 19143, 'epoch': 3} {'type': 'loss', 'content': 0.08236564695835114, 'timestamp': '2025-10-01 04:43:55.014582', 'step': 19144, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:55.067654', 'step': 19144, 'epoch': 3} {'type': 'loss', 'content': 0.05322074517607689, 'timestamp': '2025-10-01 04:43:55.069910', 'step': 19145, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:43:55.123446', 'step': 19145, 'epoch': 3} {'type': 'loss', 'content': 0.13146165013313293, 'timestamp': '2025-10-01 04:43:55.125777', 'step': 19146, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:55.179728', 'step': 19146, 'epoch': 3} {'type': 'loss', 'content': 0.15343956649303436, 'timestamp': '2025-10-01 04:43:55.182011', 'step': 19147, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:55.236683', 'step': 19147, 'epoch': 3} {'type': 'loss', 'content': 0.13242074847221375, 'timestamp': '2025-10-01 04:43:55.243357', 'step': 19148, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:43:55.297676', 'step': 19148, 'epoch': 3} {'type': 'loss', 'content': 0.07728585600852966, 'timestamp': '2025-10-01 04:43:55.300025', 'step': 19149, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:55.354563', 'step': 19149, 'epoch': 3} {'type': 'loss', 'content': 0.05010359734296799, 'timestamp': '2025-10-01 04:43:55.356786', 'step': 19150, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:55.411726', 'step': 19150, 'epoch': 3} {'type': 'loss', 'content': 0.0691608265042305, 'timestamp': '2025-10-01 04:43:55.414205', 'step': 19151, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:55.469407', 'step': 19151, 'epoch': 3} {'type': 'loss', 'content': 0.08520575612783432, 'timestamp': '2025-10-01 04:43:55.476029', 'step': 19152, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:55.530447', 'step': 19152, 'epoch': 3} {'type': 'loss', 'content': 0.07788758724927902, 'timestamp': '2025-10-01 04:43:55.534580', 'step': 19153, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:55.593694', 'step': 19153, 'epoch': 3} {'type': 'loss', 'content': 0.13973553478717804, 'timestamp': '2025-10-01 04:43:55.596008', 'step': 19154, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:55.651117', 'step': 19154, 'epoch': 3} {'type': 'loss', 'content': 0.17905870079994202, 'timestamp': '2025-10-01 04:43:55.657567', 'step': 19155, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:55.716621', 'step': 19155, 'epoch': 3} {'type': 'loss', 'content': 0.15388517081737518, 'timestamp': '2025-10-01 04:43:55.723159', 'step': 19156, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:55.776738', 'step': 19156, 'epoch': 3} {'type': 'loss', 'content': 0.09259112924337387, 'timestamp': '2025-10-01 04:43:55.780402', 'step': 19157, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:55.833545', 'step': 19157, 'epoch': 3} {'type': 'loss', 'content': 0.10224974900484085, 'timestamp': '2025-10-01 04:43:55.835779', 'step': 19158, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:55.888595', 'step': 19158, 'epoch': 3} {'type': 'loss', 'content': 0.04212713614106178, 'timestamp': '2025-10-01 04:43:55.893928', 'step': 19159, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:55.947244', 'step': 19159, 'epoch': 3} {'type': 'loss', 'content': 0.012931360863149166, 'timestamp': '2025-10-01 04:43:55.953326', 'step': 19160, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:56.005851', 'step': 19160, 'epoch': 3} {'type': 'loss', 'content': 0.03509487956762314, 'timestamp': '2025-10-01 04:43:56.008133', 'step': 19161, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:56.061308', 'step': 19161, 'epoch': 3} {'type': 'loss', 'content': 0.07498738914728165, 'timestamp': '2025-10-01 04:43:56.063570', 'step': 19162, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:43:56.117024', 'step': 19162, 'epoch': 3} {'type': 'loss', 'content': 0.14717631042003632, 'timestamp': '2025-10-01 04:43:56.119379', 'step': 19163, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:56.177566', 'step': 19163, 'epoch': 3} {'type': 'loss', 'content': 0.11917904764413834, 'timestamp': '2025-10-01 04:43:56.183815', 'step': 19164, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:56.236471', 'step': 19164, 'epoch': 3} {'type': 'loss', 'content': 0.05400380119681358, 'timestamp': '2025-10-01 04:43:56.238834', 'step': 19165, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:56.292211', 'step': 19165, 'epoch': 3} {'type': 'loss', 'content': 0.08173128217458725, 'timestamp': '2025-10-01 04:43:56.296174', 'step': 19166, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:56.350326', 'step': 19166, 'epoch': 3} {'type': 'loss', 'content': 0.18770574033260345, 'timestamp': '2025-10-01 04:43:56.352593', 'step': 19167, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:56.406098', 'step': 19167, 'epoch': 3} {'type': 'loss', 'content': 0.09863320738077164, 'timestamp': '2025-10-01 04:43:56.412420', 'step': 19168, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:56.466006', 'step': 19168, 'epoch': 3} {'type': 'loss', 'content': 0.17332066595554352, 'timestamp': '2025-10-01 04:43:56.468434', 'step': 19169, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:43:56.522907', 'step': 19169, 'epoch': 3} {'type': 'loss', 'content': 0.16165012121200562, 'timestamp': '2025-10-01 04:43:56.525290', 'step': 19170, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:56.578856', 'step': 19170, 'epoch': 3} {'type': 'loss', 'content': 0.08381127566099167, 'timestamp': '2025-10-01 04:43:56.582373', 'step': 19171, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:56.635773', 'step': 19171, 'epoch': 3} {'type': 'loss', 'content': 0.08152691274881363, 'timestamp': '2025-10-01 04:43:56.642029', 'step': 19172, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:56.696457', 'step': 19172, 'epoch': 3} {'type': 'loss', 'content': 0.0900617390871048, 'timestamp': '2025-10-01 04:43:56.704030', 'step': 19173, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:56.761395', 'step': 19173, 'epoch': 3} {'type': 'loss', 'content': 0.12520428001880646, 'timestamp': '2025-10-01 04:43:56.767741', 'step': 19174, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:56.826398', 'step': 19174, 'epoch': 3} {'type': 'loss', 'content': 0.11743234843015671, 'timestamp': '2025-10-01 04:43:56.829190', 'step': 19175, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:43:56.884889', 'step': 19175, 'epoch': 3} {'type': 'loss', 'content': 0.055593326687812805, 'timestamp': '2025-10-01 04:43:56.891435', 'step': 19176, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:43:56.945688', 'step': 19176, 'epoch': 3} {'type': 'loss', 'content': 0.12895938754081726, 'timestamp': '2025-10-01 04:43:56.952932', 'step': 19177, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:57.010077', 'step': 19177, 'epoch': 3} {'type': 'loss', 'content': 0.1627805083990097, 'timestamp': '2025-10-01 04:43:57.014760', 'step': 19178, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:57.070292', 'step': 19178, 'epoch': 3} {'type': 'loss', 'content': 0.08842490613460541, 'timestamp': '2025-10-01 04:43:57.072697', 'step': 19179, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:57.131899', 'step': 19179, 'epoch': 3} {'type': 'loss', 'content': 0.09443049877882004, 'timestamp': '2025-10-01 04:43:57.138355', 'step': 19180, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:43:57.192098', 'step': 19180, 'epoch': 3} {'type': 'loss', 'content': 0.1442822813987732, 'timestamp': '2025-10-01 04:43:57.194523', 'step': 19181, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:57.250552', 'step': 19181, 'epoch': 3} {'type': 'loss', 'content': 0.04836191236972809, 'timestamp': '2025-10-01 04:43:57.257318', 'step': 19182, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:57.323573', 'step': 19182, 'epoch': 3} {'type': 'loss', 'content': 0.10943588614463806, 'timestamp': '2025-10-01 04:43:57.326081', 'step': 19183, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:57.380829', 'step': 19183, 'epoch': 3} {'type': 'loss', 'content': 0.023390034213662148, 'timestamp': '2025-10-01 04:43:57.399915', 'step': 19184, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:57.466780', 'step': 19184, 'epoch': 3} {'type': 'loss', 'content': 0.07569411396980286, 'timestamp': '2025-10-01 04:43:57.469110', 'step': 19185, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:57.524715', 'step': 19185, 'epoch': 3} {'type': 'loss', 'content': 0.18371932208538055, 'timestamp': '2025-10-01 04:43:57.530175', 'step': 19186, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:43:57.590858', 'step': 19186, 'epoch': 3} {'type': 'loss', 'content': 0.045595284551382065, 'timestamp': '2025-10-01 04:43:57.593455', 'step': 19187, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:43:57.646764', 'step': 19187, 'epoch': 3} {'type': 'loss', 'content': 0.13552936911582947, 'timestamp': '2025-10-01 04:43:57.653039', 'step': 19188, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:43:57.713524', 'step': 19188, 'epoch': 3} {'type': 'loss', 'content': 0.1336185485124588, 'timestamp': '2025-10-01 04:43:57.716156', 'step': 19189, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:57.774813', 'step': 19189, 'epoch': 3} {'type': 'loss', 'content': 0.06531129032373428, 'timestamp': '2025-10-01 04:43:57.777780', 'step': 19190, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:57.838979', 'step': 19190, 'epoch': 3} {'type': 'loss', 'content': 0.07875635474920273, 'timestamp': '2025-10-01 04:43:57.841372', 'step': 19191, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:57.894880', 'step': 19191, 'epoch': 3} {'type': 'loss', 'content': 0.11381418257951736, 'timestamp': '2025-10-01 04:43:57.908773', 'step': 19192, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:57.976552', 'step': 19192, 'epoch': 3} {'type': 'loss', 'content': 0.12347535043954849, 'timestamp': '2025-10-01 04:43:57.978927', 'step': 19193, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:58.033404', 'step': 19193, 'epoch': 3} {'type': 'loss', 'content': 0.14687177538871765, 'timestamp': '2025-10-01 04:43:58.036125', 'step': 19194, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:43:58.091068', 'step': 19194, 'epoch': 3} {'type': 'loss', 'content': 0.09852338582277298, 'timestamp': '2025-10-01 04:43:58.093486', 'step': 19195, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:58.147074', 'step': 19195, 'epoch': 3} {'type': 'loss', 'content': 0.08284923434257507, 'timestamp': '2025-10-01 04:43:58.153413', 'step': 19196, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:58.206132', 'step': 19196, 'epoch': 3} {'type': 'loss', 'content': 0.06442168354988098, 'timestamp': '2025-10-01 04:43:58.208392', 'step': 19197, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:43:58.262335', 'step': 19197, 'epoch': 3} {'type': 'loss', 'content': 0.031671199947595596, 'timestamp': '2025-10-01 04:43:58.266508', 'step': 19198, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:43:58.325294', 'step': 19198, 'epoch': 3} {'type': 'loss', 'content': 0.06558363884687424, 'timestamp': '2025-10-01 04:43:58.328277', 'step': 19199, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:58.382082', 'step': 19199, 'epoch': 3} {'type': 'loss', 'content': 0.20441612601280212, 'timestamp': '2025-10-01 04:43:58.389812', 'step': 19200, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:58.442839', 'step': 19200, 'epoch': 3} {'type': 'loss', 'content': 0.0921480804681778, 'timestamp': '2025-10-01 04:43:58.445166', 'step': 19201, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:58.499150', 'step': 19201, 'epoch': 3} {'type': 'loss', 'content': 0.03261437639594078, 'timestamp': '2025-10-01 04:43:58.501388', 'step': 19202, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:43:58.555277', 'step': 19202, 'epoch': 3} {'type': 'loss', 'content': 0.08473989367485046, 'timestamp': '2025-10-01 04:43:58.559898', 'step': 19203, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:58.613376', 'step': 19203, 'epoch': 3} {'type': 'loss', 'content': 0.09770388901233673, 'timestamp': '2025-10-01 04:43:58.619309', 'step': 19204, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:58.677238', 'step': 19204, 'epoch': 3} {'type': 'loss', 'content': 0.07983662188053131, 'timestamp': '2025-10-01 04:43:58.679551', 'step': 19205, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:58.734148', 'step': 19205, 'epoch': 3} {'type': 'loss', 'content': 0.10231959074735641, 'timestamp': '2025-10-01 04:43:58.736628', 'step': 19206, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:43:58.793436', 'step': 19206, 'epoch': 3} {'type': 'loss', 'content': 0.1485586315393448, 'timestamp': '2025-10-01 04:43:58.796102', 'step': 19207, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:58.859261', 'step': 19207, 'epoch': 3} {'type': 'loss', 'content': 0.06807094067335129, 'timestamp': '2025-10-01 04:43:58.865371', 'step': 19208, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:58.918556', 'step': 19208, 'epoch': 3} {'type': 'loss', 'content': 0.06402573734521866, 'timestamp': '2025-10-01 04:43:58.921771', 'step': 19209, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:58.975361', 'step': 19209, 'epoch': 3} {'type': 'loss', 'content': 0.20363211631774902, 'timestamp': '2025-10-01 04:43:58.977673', 'step': 19210, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:43:59.032629', 'step': 19210, 'epoch': 3} {'type': 'loss', 'content': 0.06595185399055481, 'timestamp': '2025-10-01 04:43:59.034720', 'step': 19211, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:59.088321', 'step': 19211, 'epoch': 3} {'type': 'loss', 'content': 0.06633000075817108, 'timestamp': '2025-10-01 04:43:59.094461', 'step': 19212, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:59.147668', 'step': 19212, 'epoch': 3} {'type': 'loss', 'content': 0.21639905869960785, 'timestamp': '2025-10-01 04:43:59.150641', 'step': 19213, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:59.204469', 'step': 19213, 'epoch': 3} {'type': 'loss', 'content': 0.08021645992994308, 'timestamp': '2025-10-01 04:43:59.206878', 'step': 19214, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:43:59.263225', 'step': 19214, 'epoch': 3} {'type': 'loss', 'content': 0.08120782673358917, 'timestamp': '2025-10-01 04:43:59.266366', 'step': 19215, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:59.320253', 'step': 19215, 'epoch': 3} {'type': 'loss', 'content': 0.09510084241628647, 'timestamp': '2025-10-01 04:43:59.326069', 'step': 19216, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:59.379519', 'step': 19216, 'epoch': 3} {'type': 'loss', 'content': 0.09693683683872223, 'timestamp': '2025-10-01 04:43:59.385236', 'step': 19217, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:43:59.447378', 'step': 19217, 'epoch': 3} {'type': 'loss', 'content': 0.07959836721420288, 'timestamp': '2025-10-01 04:43:59.450233', 'step': 19218, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:59.507852', 'step': 19218, 'epoch': 3} {'type': 'loss', 'content': 0.2066325843334198, 'timestamp': '2025-10-01 04:43:59.511283', 'step': 19219, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:59.565911', 'step': 19219, 'epoch': 3} {'type': 'loss', 'content': 0.15137715637683868, 'timestamp': '2025-10-01 04:43:59.573280', 'step': 19220, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:43:59.626643', 'step': 19220, 'epoch': 3} {'type': 'loss', 'content': 0.134856715798378, 'timestamp': '2025-10-01 04:43:59.629198', 'step': 19221, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:43:59.683379', 'step': 19221, 'epoch': 3} {'type': 'loss', 'content': 0.07714271545410156, 'timestamp': '2025-10-01 04:43:59.685636', 'step': 19222, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:59.739358', 'step': 19222, 'epoch': 3} {'type': 'loss', 'content': 0.10365680605173111, 'timestamp': '2025-10-01 04:43:59.747483', 'step': 19223, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:59.800641', 'step': 19223, 'epoch': 3} {'type': 'loss', 'content': 0.07735186815261841, 'timestamp': '2025-10-01 04:43:59.806992', 'step': 19224, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:43:59.860254', 'step': 19224, 'epoch': 3} {'type': 'loss', 'content': 0.09479610621929169, 'timestamp': '2025-10-01 04:43:59.874827', 'step': 19225, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:43:59.928942', 'step': 19225, 'epoch': 3} {'type': 'loss', 'content': 0.04748864471912384, 'timestamp': '2025-10-01 04:43:59.932343', 'step': 19226, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:43:59.998161', 'step': 19226, 'epoch': 3} {'type': 'loss', 'content': 0.06445054709911346, 'timestamp': '2025-10-01 04:44:00.000821', 'step': 19227, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:44:00.055629', 'step': 19227, 'epoch': 3} {'type': 'loss', 'content': 0.1381930559873581, 'timestamp': '2025-10-01 04:44:00.062095', 'step': 19228, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:00.116119', 'step': 19228, 'epoch': 3} {'type': 'loss', 'content': 0.12281119078397751, 'timestamp': '2025-10-01 04:44:00.118737', 'step': 19229, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:00.184152', 'step': 19229, 'epoch': 3} {'type': 'loss', 'content': 0.13757357001304626, 'timestamp': '2025-10-01 04:44:00.186988', 'step': 19230, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:00.241561', 'step': 19230, 'epoch': 3} {'type': 'loss', 'content': 0.010096803307533264, 'timestamp': '2025-10-01 04:44:00.244160', 'step': 19231, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:00.299120', 'step': 19231, 'epoch': 3} {'type': 'loss', 'content': 0.14079885184764862, 'timestamp': '2025-10-01 04:44:00.305496', 'step': 19232, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:00.359149', 'step': 19232, 'epoch': 3} {'type': 'loss', 'content': 0.06673373281955719, 'timestamp': '2025-10-01 04:44:00.361472', 'step': 19233, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:00.425561', 'step': 19233, 'epoch': 3} {'type': 'loss', 'content': 0.15466876327991486, 'timestamp': '2025-10-01 04:44:00.441111', 'step': 19234, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:00.496641', 'step': 19234, 'epoch': 3} {'type': 'loss', 'content': 0.15573489665985107, 'timestamp': '2025-10-01 04:44:00.499143', 'step': 19235, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:00.554454', 'step': 19235, 'epoch': 3} {'type': 'loss', 'content': 0.08163480460643768, 'timestamp': '2025-10-01 04:44:00.560726', 'step': 19236, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:44:00.620827', 'step': 19236, 'epoch': 3} {'type': 'loss', 'content': 0.16846667230129242, 'timestamp': '2025-10-01 04:44:00.623180', 'step': 19237, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:44:00.682843', 'step': 19237, 'epoch': 3} {'type': 'loss', 'content': 0.0433058887720108, 'timestamp': '2025-10-01 04:44:00.686246', 'step': 19238, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:00.743980', 'step': 19238, 'epoch': 3} {'type': 'loss', 'content': 0.08299992978572845, 'timestamp': '2025-10-01 04:44:00.746522', 'step': 19239, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:00.800819', 'step': 19239, 'epoch': 3} {'type': 'loss', 'content': 0.09182005375623703, 'timestamp': '2025-10-01 04:44:00.807049', 'step': 19240, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:00.869800', 'step': 19240, 'epoch': 3} {'type': 'loss', 'content': 0.13353940844535828, 'timestamp': '2025-10-01 04:44:00.872990', 'step': 19241, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:00.927632', 'step': 19241, 'epoch': 3} {'type': 'loss', 'content': 0.10951720178127289, 'timestamp': '2025-10-01 04:44:00.930523', 'step': 19242, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:00.984865', 'step': 19242, 'epoch': 3} {'type': 'loss', 'content': 0.13127538561820984, 'timestamp': '2025-10-01 04:44:00.987826', 'step': 19243, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:44:01.050387', 'step': 19243, 'epoch': 3} {'type': 'loss', 'content': 0.13361811637878418, 'timestamp': '2025-10-01 04:44:01.056675', 'step': 19244, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:44:01.116766', 'step': 19244, 'epoch': 3} {'type': 'loss', 'content': 0.0861206203699112, 'timestamp': '2025-10-01 04:44:01.119548', 'step': 19245, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:01.173495', 'step': 19245, 'epoch': 3} {'type': 'loss', 'content': 0.09894565492868423, 'timestamp': '2025-10-01 04:44:01.176238', 'step': 19246, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:01.230798', 'step': 19246, 'epoch': 3} {'type': 'loss', 'content': 0.09879133105278015, 'timestamp': '2025-10-01 04:44:01.233548', 'step': 19247, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:01.288461', 'step': 19247, 'epoch': 3} {'type': 'loss', 'content': 0.05965225398540497, 'timestamp': '2025-10-01 04:44:01.294731', 'step': 19248, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:01.355286', 'step': 19248, 'epoch': 3} {'type': 'loss', 'content': 0.12104091793298721, 'timestamp': '2025-10-01 04:44:01.357952', 'step': 19249, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:01.415573', 'step': 19249, 'epoch': 3} {'type': 'loss', 'content': 0.14612415432929993, 'timestamp': '2025-10-01 04:44:01.418322', 'step': 19250, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:01.473304', 'step': 19250, 'epoch': 3} {'type': 'loss', 'content': 0.15485228598117828, 'timestamp': '2025-10-01 04:44:01.475878', 'step': 19251, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:01.540072', 'step': 19251, 'epoch': 3} {'type': 'loss', 'content': 0.06047514081001282, 'timestamp': '2025-10-01 04:44:01.546258', 'step': 19252, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:01.601190', 'step': 19252, 'epoch': 3} {'type': 'loss', 'content': 0.09716696292161942, 'timestamp': '2025-10-01 04:44:01.603629', 'step': 19253, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:01.658435', 'step': 19253, 'epoch': 3} {'type': 'loss', 'content': 0.05503841117024422, 'timestamp': '2025-10-01 04:44:01.660847', 'step': 19254, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:01.714276', 'step': 19254, 'epoch': 3} {'type': 'loss', 'content': 0.09133551269769669, 'timestamp': '2025-10-01 04:44:01.716825', 'step': 19255, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:01.770841', 'step': 19255, 'epoch': 3} {'type': 'loss', 'content': 0.12454892694950104, 'timestamp': '2025-10-01 04:44:01.776927', 'step': 19256, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:01.831669', 'step': 19256, 'epoch': 3} {'type': 'loss', 'content': 0.07930876314640045, 'timestamp': '2025-10-01 04:44:01.837210', 'step': 19257, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:01.892847', 'step': 19257, 'epoch': 3} {'type': 'loss', 'content': 0.057862285524606705, 'timestamp': '2025-10-01 04:44:01.897539', 'step': 19258, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:01.952394', 'step': 19258, 'epoch': 3} {'type': 'loss', 'content': 0.11866816133260727, 'timestamp': '2025-10-01 04:44:01.954650', 'step': 19259, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:02.009133', 'step': 19259, 'epoch': 3} {'type': 'loss', 'content': 0.09657265990972519, 'timestamp': '2025-10-01 04:44:02.024669', 'step': 19260, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:44:02.077551', 'step': 19260, 'epoch': 3} {'type': 'loss', 'content': 0.10289809107780457, 'timestamp': '2025-10-01 04:44:02.079930', 'step': 19261, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:44:02.132987', 'step': 19261, 'epoch': 3} {'type': 'loss', 'content': 0.1212339848279953, 'timestamp': '2025-10-01 04:44:02.135412', 'step': 19262, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:02.189087', 'step': 19262, 'epoch': 3} {'type': 'loss', 'content': 0.09129878878593445, 'timestamp': '2025-10-01 04:44:02.191939', 'step': 19263, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:02.245383', 'step': 19263, 'epoch': 3} {'type': 'loss', 'content': 0.10621745884418488, 'timestamp': '2025-10-01 04:44:02.251285', 'step': 19264, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:02.304452', 'step': 19264, 'epoch': 3} {'type': 'loss', 'content': 0.05779216066002846, 'timestamp': '2025-10-01 04:44:02.306937', 'step': 19265, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:02.362360', 'step': 19265, 'epoch': 3} {'type': 'loss', 'content': 0.13447704911231995, 'timestamp': '2025-10-01 04:44:02.365084', 'step': 19266, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:44:02.419504', 'step': 19266, 'epoch': 3} {'type': 'loss', 'content': 0.12569624185562134, 'timestamp': '2025-10-01 04:44:02.421919', 'step': 19267, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:02.488949', 'step': 19267, 'epoch': 3} {'type': 'loss', 'content': 0.1152680441737175, 'timestamp': '2025-10-01 04:44:02.494893', 'step': 19268, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:02.548353', 'step': 19268, 'epoch': 3} {'type': 'loss', 'content': 0.0754845067858696, 'timestamp': '2025-10-01 04:44:02.550887', 'step': 19269, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:02.604349', 'step': 19269, 'epoch': 3} {'type': 'loss', 'content': 0.15754733979701996, 'timestamp': '2025-10-01 04:44:02.614337', 'step': 19270, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:44:02.669151', 'step': 19270, 'epoch': 3} {'type': 'loss', 'content': 0.04973223805427551, 'timestamp': '2025-10-01 04:44:02.671732', 'step': 19271, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:02.726284', 'step': 19271, 'epoch': 3} {'type': 'loss', 'content': 0.12015626579523087, 'timestamp': '2025-10-01 04:44:02.733690', 'step': 19272, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:02.786616', 'step': 19272, 'epoch': 3} {'type': 'loss', 'content': 0.08713966608047485, 'timestamp': '2025-10-01 04:44:02.789112', 'step': 19273, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:02.842620', 'step': 19273, 'epoch': 3} {'type': 'loss', 'content': 0.13460633158683777, 'timestamp': '2025-10-01 04:44:02.845070', 'step': 19274, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:02.898277', 'step': 19274, 'epoch': 3} {'type': 'loss', 'content': 0.15891307592391968, 'timestamp': '2025-10-01 04:44:02.901120', 'step': 19275, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:02.954676', 'step': 19275, 'epoch': 3} {'type': 'loss', 'content': 0.033303387463092804, 'timestamp': '2025-10-01 04:44:02.960728', 'step': 19276, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:03.024459', 'step': 19276, 'epoch': 3} {'type': 'loss', 'content': 0.1342822015285492, 'timestamp': '2025-10-01 04:44:03.027786', 'step': 19277, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:03.082952', 'step': 19277, 'epoch': 3} {'type': 'loss', 'content': 0.11305560916662216, 'timestamp': '2025-10-01 04:44:03.086831', 'step': 19278, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:03.141445', 'step': 19278, 'epoch': 3} {'type': 'loss', 'content': 0.16284865140914917, 'timestamp': '2025-10-01 04:44:03.144200', 'step': 19279, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:03.211583', 'step': 19279, 'epoch': 3} {'type': 'loss', 'content': 0.04072197899222374, 'timestamp': '2025-10-01 04:44:03.218483', 'step': 19280, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:44:03.274094', 'step': 19280, 'epoch': 3} {'type': 'loss', 'content': 0.033730439841747284, 'timestamp': '2025-10-01 04:44:03.276410', 'step': 19281, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:03.340223', 'step': 19281, 'epoch': 3} {'type': 'loss', 'content': 0.1111992746591568, 'timestamp': '2025-10-01 04:44:03.342786', 'step': 19282, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:03.397538', 'step': 19282, 'epoch': 3} {'type': 'loss', 'content': 0.03411204740405083, 'timestamp': '2025-10-01 04:44:03.400295', 'step': 19283, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:44:03.454506', 'step': 19283, 'epoch': 3} {'type': 'loss', 'content': 0.09250408411026001, 'timestamp': '2025-10-01 04:44:03.460666', 'step': 19284, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:03.514390', 'step': 19284, 'epoch': 3} {'type': 'loss', 'content': 0.08674634993076324, 'timestamp': '2025-10-01 04:44:03.517306', 'step': 19285, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:03.571726', 'step': 19285, 'epoch': 3} {'type': 'loss', 'content': 0.13840559124946594, 'timestamp': '2025-10-01 04:44:03.576399', 'step': 19286, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:03.632248', 'step': 19286, 'epoch': 3} {'type': 'loss', 'content': 0.07311493903398514, 'timestamp': '2025-10-01 04:44:03.634946', 'step': 19287, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:44:03.689181', 'step': 19287, 'epoch': 3} {'type': 'loss', 'content': 0.1639147400856018, 'timestamp': '2025-10-01 04:44:03.695310', 'step': 19288, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:03.748195', 'step': 19288, 'epoch': 3} {'type': 'loss', 'content': 0.07275251299142838, 'timestamp': '2025-10-01 04:44:03.751160', 'step': 19289, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:44:03.804783', 'step': 19289, 'epoch': 3} {'type': 'loss', 'content': 0.19594264030456543, 'timestamp': '2025-10-01 04:44:03.807341', 'step': 19290, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:03.860922', 'step': 19290, 'epoch': 3} {'type': 'loss', 'content': 0.0971146821975708, 'timestamp': '2025-10-01 04:44:03.863321', 'step': 19291, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:03.917401', 'step': 19291, 'epoch': 3} {'type': 'loss', 'content': 0.16003437340259552, 'timestamp': '2025-10-01 04:44:03.923637', 'step': 19292, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:03.976340', 'step': 19292, 'epoch': 3} {'type': 'loss', 'content': 0.07346747815608978, 'timestamp': '2025-10-01 04:44:03.978761', 'step': 19293, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:04.032196', 'step': 19293, 'epoch': 3} {'type': 'loss', 'content': 0.1877981424331665, 'timestamp': '2025-10-01 04:44:04.034514', 'step': 19294, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:04.087407', 'step': 19294, 'epoch': 3} {'type': 'loss', 'content': 0.14024469256401062, 'timestamp': '2025-10-01 04:44:04.089786', 'step': 19295, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:04.143109', 'step': 19295, 'epoch': 3} {'type': 'loss', 'content': 0.06581477075815201, 'timestamp': '2025-10-01 04:44:04.149105', 'step': 19296, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:44:04.203200', 'step': 19296, 'epoch': 3} {'type': 'loss', 'content': 0.04628321900963783, 'timestamp': '2025-10-01 04:44:04.206910', 'step': 19297, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:04.262164', 'step': 19297, 'epoch': 3} {'type': 'loss', 'content': 0.05821505934000015, 'timestamp': '2025-10-01 04:44:04.265229', 'step': 19298, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:44:04.319529', 'step': 19298, 'epoch': 3} {'type': 'loss', 'content': 0.11221688240766525, 'timestamp': '2025-10-01 04:44:04.322218', 'step': 19299, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:04.376821', 'step': 19299, 'epoch': 3} {'type': 'loss', 'content': 0.0986744835972786, 'timestamp': '2025-10-01 04:44:04.382986', 'step': 19300, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:04.435950', 'step': 19300, 'epoch': 3} {'type': 'loss', 'content': 0.16713310778141022, 'timestamp': '2025-10-01 04:44:04.438366', 'step': 19301, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:04.491987', 'step': 19301, 'epoch': 3} {'type': 'loss', 'content': 0.14655114710330963, 'timestamp': '2025-10-01 04:44:04.494484', 'step': 19302, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:04.548233', 'step': 19302, 'epoch': 3} {'type': 'loss', 'content': 0.06293001025915146, 'timestamp': '2025-10-01 04:44:04.550757', 'step': 19303, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:04.604142', 'step': 19303, 'epoch': 3} {'type': 'loss', 'content': 0.040952183306217194, 'timestamp': '2025-10-01 04:44:04.611048', 'step': 19304, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:04.676773', 'step': 19304, 'epoch': 3} {'type': 'loss', 'content': 0.13857698440551758, 'timestamp': '2025-10-01 04:44:04.680339', 'step': 19305, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:04.734714', 'step': 19305, 'epoch': 3} {'type': 'loss', 'content': 0.09540526568889618, 'timestamp': '2025-10-01 04:44:04.741304', 'step': 19306, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:04.795236', 'step': 19306, 'epoch': 3} {'type': 'loss', 'content': 0.1177663654088974, 'timestamp': '2025-10-01 04:44:04.797883', 'step': 19307, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:04.851465', 'step': 19307, 'epoch': 3} {'type': 'loss', 'content': 0.03198953717947006, 'timestamp': '2025-10-01 04:44:04.859121', 'step': 19308, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:04.911948', 'step': 19308, 'epoch': 3} {'type': 'loss', 'content': 0.1387823224067688, 'timestamp': '2025-10-01 04:44:04.914254', 'step': 19309, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:04.970201', 'step': 19309, 'epoch': 3} {'type': 'loss', 'content': 0.06172816827893257, 'timestamp': '2025-10-01 04:44:04.973234', 'step': 19310, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:05.027822', 'step': 19310, 'epoch': 3} {'type': 'loss', 'content': 0.11753661185503006, 'timestamp': '2025-10-01 04:44:05.030237', 'step': 19311, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:05.085167', 'step': 19311, 'epoch': 3} {'type': 'loss', 'content': 0.10015171766281128, 'timestamp': '2025-10-01 04:44:05.093758', 'step': 19312, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:05.155993', 'step': 19312, 'epoch': 3} {'type': 'loss', 'content': 0.04115019738674164, 'timestamp': '2025-10-01 04:44:05.162105', 'step': 19313, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:05.216088', 'step': 19313, 'epoch': 3} {'type': 'loss', 'content': 0.15722502768039703, 'timestamp': '2025-10-01 04:44:05.218790', 'step': 19314, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:05.272619', 'step': 19314, 'epoch': 3} {'type': 'loss', 'content': 0.08737200498580933, 'timestamp': '2025-10-01 04:44:05.275162', 'step': 19315, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:05.328342', 'step': 19315, 'epoch': 3} {'type': 'loss', 'content': 0.15725818276405334, 'timestamp': '2025-10-01 04:44:05.334295', 'step': 19316, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:05.386374', 'step': 19316, 'epoch': 3} {'type': 'loss', 'content': 0.07984963059425354, 'timestamp': '2025-10-01 04:44:05.388907', 'step': 19317, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:05.442252', 'step': 19317, 'epoch': 3} {'type': 'loss', 'content': 0.09696877747774124, 'timestamp': '2025-10-01 04:44:05.444499', 'step': 19318, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:05.498086', 'step': 19318, 'epoch': 3} {'type': 'loss', 'content': 0.08454073965549469, 'timestamp': '2025-10-01 04:44:05.500436', 'step': 19319, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:05.553554', 'step': 19319, 'epoch': 3} {'type': 'loss', 'content': 0.14693433046340942, 'timestamp': '2025-10-01 04:44:05.559622', 'step': 19320, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:05.614484', 'step': 19320, 'epoch': 3} {'type': 'loss', 'content': 0.09745355695486069, 'timestamp': '2025-10-01 04:44:05.617357', 'step': 19321, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:05.670354', 'step': 19321, 'epoch': 3} {'type': 'loss', 'content': 0.18859055638313293, 'timestamp': '2025-10-01 04:44:05.672582', 'step': 19322, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:44:05.727251', 'step': 19322, 'epoch': 3} {'type': 'loss', 'content': 0.07484276592731476, 'timestamp': '2025-10-01 04:44:05.729675', 'step': 19323, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:05.788546', 'step': 19323, 'epoch': 3} {'type': 'loss', 'content': 0.07902738451957703, 'timestamp': '2025-10-01 04:44:05.797130', 'step': 19324, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:05.849834', 'step': 19324, 'epoch': 3} {'type': 'loss', 'content': 0.03880104422569275, 'timestamp': '2025-10-01 04:44:05.852155', 'step': 19325, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:05.905709', 'step': 19325, 'epoch': 3} {'type': 'loss', 'content': 0.12251795828342438, 'timestamp': '2025-10-01 04:44:05.907856', 'step': 19326, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:05.961855', 'step': 19326, 'epoch': 3} {'type': 'loss', 'content': 0.07388316094875336, 'timestamp': '2025-10-01 04:44:05.964504', 'step': 19327, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:44:06.018399', 'step': 19327, 'epoch': 3} {'type': 'loss', 'content': 0.09379637986421585, 'timestamp': '2025-10-01 04:44:06.028772', 'step': 19328, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:06.086948', 'step': 19328, 'epoch': 3} {'type': 'loss', 'content': 0.06875410676002502, 'timestamp': '2025-10-01 04:44:06.092197', 'step': 19329, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:06.145841', 'step': 19329, 'epoch': 3} {'type': 'loss', 'content': 0.05924037843942642, 'timestamp': '2025-10-01 04:44:06.165148', 'step': 19330, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:44:06.227327', 'step': 19330, 'epoch': 3} {'type': 'loss', 'content': 0.1278340369462967, 'timestamp': '2025-10-01 04:44:06.230764', 'step': 19331, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:44:06.296491', 'step': 19331, 'epoch': 3} {'type': 'loss', 'content': 0.07336043566465378, 'timestamp': '2025-10-01 04:44:06.302212', 'step': 19332, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:44:06.354413', 'step': 19332, 'epoch': 3} {'type': 'loss', 'content': 0.1223035603761673, 'timestamp': '2025-10-01 04:44:06.357796', 'step': 19333, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:06.410581', 'step': 19333, 'epoch': 3} {'type': 'loss', 'content': 0.12637723982334137, 'timestamp': '2025-10-01 04:44:06.412762', 'step': 19334, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:06.466716', 'step': 19334, 'epoch': 3} {'type': 'loss', 'content': 0.022172462195158005, 'timestamp': '2025-10-01 04:44:06.469217', 'step': 19335, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:06.525395', 'step': 19335, 'epoch': 3} {'type': 'loss', 'content': 0.08749192953109741, 'timestamp': '2025-10-01 04:44:06.531099', 'step': 19336, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:06.583174', 'step': 19336, 'epoch': 3} {'type': 'loss', 'content': 0.07245614379644394, 'timestamp': '2025-10-01 04:44:06.585578', 'step': 19337, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:06.639499', 'step': 19337, 'epoch': 3} {'type': 'loss', 'content': 0.13016639649868011, 'timestamp': '2025-10-01 04:44:06.650772', 'step': 19338, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:06.705041', 'step': 19338, 'epoch': 3} {'type': 'loss', 'content': 0.0894409790635109, 'timestamp': '2025-10-01 04:44:06.707107', 'step': 19339, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:06.760237', 'step': 19339, 'epoch': 3} {'type': 'loss', 'content': 0.1383403241634369, 'timestamp': '2025-10-01 04:44:06.766037', 'step': 19340, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:06.819969', 'step': 19340, 'epoch': 3} {'type': 'loss', 'content': 0.0848526805639267, 'timestamp': '2025-10-01 04:44:06.824095', 'step': 19341, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:44:06.878452', 'step': 19341, 'epoch': 3} {'type': 'loss', 'content': 0.10805948078632355, 'timestamp': '2025-10-01 04:44:06.880937', 'step': 19342, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:06.934016', 'step': 19342, 'epoch': 3} {'type': 'loss', 'content': 0.1267092376947403, 'timestamp': '2025-10-01 04:44:06.936524', 'step': 19343, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:44:06.996974', 'step': 19343, 'epoch': 3} {'type': 'loss', 'content': 0.10993022471666336, 'timestamp': '2025-10-01 04:44:07.002849', 'step': 19344, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:07.055746', 'step': 19344, 'epoch': 3} {'type': 'loss', 'content': 0.07141772657632828, 'timestamp': '2025-10-01 04:44:07.060181', 'step': 19345, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:07.121483', 'step': 19345, 'epoch': 3} {'type': 'loss', 'content': 0.15497063100337982, 'timestamp': '2025-10-01 04:44:07.132333', 'step': 19346, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:07.185436', 'step': 19346, 'epoch': 3} {'type': 'loss', 'content': 0.08060622215270996, 'timestamp': '2025-10-01 04:44:07.188272', 'step': 19347, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:07.241401', 'step': 19347, 'epoch': 3} {'type': 'loss', 'content': 0.04584304615855217, 'timestamp': '2025-10-01 04:44:07.247387', 'step': 19348, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:07.299819', 'step': 19348, 'epoch': 3} {'type': 'loss', 'content': 0.12458155304193497, 'timestamp': '2025-10-01 04:44:07.307151', 'step': 19349, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:07.371214', 'step': 19349, 'epoch': 3} {'type': 'loss', 'content': 0.07647562772035599, 'timestamp': '2025-10-01 04:44:07.374601', 'step': 19350, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:07.428021', 'step': 19350, 'epoch': 3} {'type': 'loss', 'content': 0.08361400663852692, 'timestamp': '2025-10-01 04:44:07.430087', 'step': 19351, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:07.486782', 'step': 19351, 'epoch': 3} {'type': 'loss', 'content': 0.13533221185207367, 'timestamp': '2025-10-01 04:44:07.492596', 'step': 19352, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:07.545793', 'step': 19352, 'epoch': 3} {'type': 'loss', 'content': 0.08218833059072495, 'timestamp': '2025-10-01 04:44:07.547642', 'step': 19353, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:07.605438', 'step': 19353, 'epoch': 3} {'type': 'loss', 'content': 0.1176072433590889, 'timestamp': '2025-10-01 04:44:07.607625', 'step': 19354, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:07.662203', 'step': 19354, 'epoch': 3} {'type': 'loss', 'content': 0.17094585299491882, 'timestamp': '2025-10-01 04:44:07.664703', 'step': 19355, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:07.719117', 'step': 19355, 'epoch': 3} {'type': 'loss', 'content': 0.04237724095582962, 'timestamp': '2025-10-01 04:44:07.724865', 'step': 19356, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:07.778720', 'step': 19356, 'epoch': 3} {'type': 'loss', 'content': 0.15591728687286377, 'timestamp': '2025-10-01 04:44:07.781010', 'step': 19357, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:07.833986', 'step': 19357, 'epoch': 3} {'type': 'loss', 'content': 0.05558755621314049, 'timestamp': '2025-10-01 04:44:07.836576', 'step': 19358, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:07.897596', 'step': 19358, 'epoch': 3} {'type': 'loss', 'content': 0.0870051309466362, 'timestamp': '2025-10-01 04:44:07.899774', 'step': 19359, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:07.953415', 'step': 19359, 'epoch': 3} {'type': 'loss', 'content': 0.07156853377819061, 'timestamp': '2025-10-01 04:44:07.958904', 'step': 19360, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:44:08.013195', 'step': 19360, 'epoch': 3} {'type': 'loss', 'content': 0.15346047282218933, 'timestamp': '2025-10-01 04:44:08.015818', 'step': 19361, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:08.069323', 'step': 19361, 'epoch': 3} {'type': 'loss', 'content': 0.15266957879066467, 'timestamp': '2025-10-01 04:44:08.071357', 'step': 19362, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:08.125464', 'step': 19362, 'epoch': 3} {'type': 'loss', 'content': 0.10240142792463303, 'timestamp': '2025-10-01 04:44:08.127709', 'step': 19363, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:08.181010', 'step': 19363, 'epoch': 3} {'type': 'loss', 'content': 0.1200287863612175, 'timestamp': '2025-10-01 04:44:08.186800', 'step': 19364, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:08.241139', 'step': 19364, 'epoch': 3} {'type': 'loss', 'content': 0.09500943124294281, 'timestamp': '2025-10-01 04:44:08.243464', 'step': 19365, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:08.296736', 'step': 19365, 'epoch': 3} {'type': 'loss', 'content': 0.06409069150686264, 'timestamp': '2025-10-01 04:44:08.298976', 'step': 19366, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:08.352879', 'step': 19366, 'epoch': 3} {'type': 'loss', 'content': 0.05413036048412323, 'timestamp': '2025-10-01 04:44:08.355352', 'step': 19367, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:08.412277', 'step': 19367, 'epoch': 3} {'type': 'loss', 'content': 0.10681421309709549, 'timestamp': '2025-10-01 04:44:08.418282', 'step': 19368, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:08.471394', 'step': 19368, 'epoch': 3} {'type': 'loss', 'content': 0.047230374068021774, 'timestamp': '2025-10-01 04:44:08.473872', 'step': 19369, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:44:08.527182', 'step': 19369, 'epoch': 3} {'type': 'loss', 'content': 0.12305886298418045, 'timestamp': '2025-10-01 04:44:08.529580', 'step': 19370, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:08.583559', 'step': 19370, 'epoch': 3} {'type': 'loss', 'content': 0.10971105098724365, 'timestamp': '2025-10-01 04:44:08.586493', 'step': 19371, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:44:08.640904', 'step': 19371, 'epoch': 3} {'type': 'loss', 'content': 0.08260554820299149, 'timestamp': '2025-10-01 04:44:08.647120', 'step': 19372, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:08.700500', 'step': 19372, 'epoch': 3} {'type': 'loss', 'content': 0.07557018846273422, 'timestamp': '2025-10-01 04:44:08.702842', 'step': 19373, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:08.756469', 'step': 19373, 'epoch': 3} {'type': 'loss', 'content': 0.08821026980876923, 'timestamp': '2025-10-01 04:44:08.758777', 'step': 19374, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:08.820315', 'step': 19374, 'epoch': 3} {'type': 'loss', 'content': 0.1853318214416504, 'timestamp': '2025-10-01 04:44:08.822350', 'step': 19375, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:08.878223', 'step': 19375, 'epoch': 3} {'type': 'loss', 'content': 0.05363951250910759, 'timestamp': '2025-10-01 04:44:08.884237', 'step': 19376, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:08.946526', 'step': 19376, 'epoch': 3} {'type': 'loss', 'content': 0.0710202008485794, 'timestamp': '2025-10-01 04:44:08.948804', 'step': 19377, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:09.002739', 'step': 19377, 'epoch': 3} {'type': 'loss', 'content': 0.08677074313163757, 'timestamp': '2025-10-01 04:44:09.006288', 'step': 19378, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:09.060723', 'step': 19378, 'epoch': 3} {'type': 'loss', 'content': 0.061113204807043076, 'timestamp': '2025-10-01 04:44:09.063770', 'step': 19379, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:09.119130', 'step': 19379, 'epoch': 3} {'type': 'loss', 'content': 0.11942863464355469, 'timestamp': '2025-10-01 04:44:09.125663', 'step': 19380, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:09.179685', 'step': 19380, 'epoch': 3} {'type': 'loss', 'content': 0.11949465423822403, 'timestamp': '2025-10-01 04:44:09.183234', 'step': 19381, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:09.237825', 'step': 19381, 'epoch': 3} {'type': 'loss', 'content': 0.07363872230052948, 'timestamp': '2025-10-01 04:44:09.240393', 'step': 19382, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:09.295188', 'step': 19382, 'epoch': 3} {'type': 'loss', 'content': 0.12719927728176117, 'timestamp': '2025-10-01 04:44:09.297678', 'step': 19383, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:09.351728', 'step': 19383, 'epoch': 3} {'type': 'loss', 'content': 0.12092892825603485, 'timestamp': '2025-10-01 04:44:09.358659', 'step': 19384, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:09.412853', 'step': 19384, 'epoch': 3} {'type': 'loss', 'content': 0.037968385964632034, 'timestamp': '2025-10-01 04:44:09.415804', 'step': 19385, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:09.470297', 'step': 19385, 'epoch': 3} {'type': 'loss', 'content': 0.11715095490217209, 'timestamp': '2025-10-01 04:44:09.472823', 'step': 19386, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:09.526696', 'step': 19386, 'epoch': 3} {'type': 'loss', 'content': 0.07039718329906464, 'timestamp': '2025-10-01 04:44:09.529167', 'step': 19387, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-01 04:44:09.583988', 'step': 19387, 'epoch': 3} {'type': 'loss', 'content': 0.06722746789455414, 'timestamp': '2025-10-01 04:44:09.590081', 'step': 19388, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:09.643785', 'step': 19388, 'epoch': 3} {'type': 'loss', 'content': 0.040068238973617554, 'timestamp': '2025-10-01 04:44:09.646211', 'step': 19389, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:09.700229', 'step': 19389, 'epoch': 3} {'type': 'loss', 'content': 0.0677018016576767, 'timestamp': '2025-10-01 04:44:09.702882', 'step': 19390, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:09.756799', 'step': 19390, 'epoch': 3} {'type': 'loss', 'content': 0.09022148698568344, 'timestamp': '2025-10-01 04:44:09.759251', 'step': 19391, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:09.813387', 'step': 19391, 'epoch': 3} {'type': 'loss', 'content': 0.06513658165931702, 'timestamp': '2025-10-01 04:44:09.819252', 'step': 19392, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:09.873262', 'step': 19392, 'epoch': 3} {'type': 'loss', 'content': 0.15824903547763824, 'timestamp': '2025-10-01 04:44:09.875683', 'step': 19393, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:44:09.929836', 'step': 19393, 'epoch': 3} {'type': 'loss', 'content': 0.05060786008834839, 'timestamp': '2025-10-01 04:44:09.932384', 'step': 19394, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:09.986619', 'step': 19394, 'epoch': 3} {'type': 'loss', 'content': 0.11440785974264145, 'timestamp': '2025-10-01 04:44:09.988995', 'step': 19395, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:44:10.044455', 'step': 19395, 'epoch': 3} {'type': 'loss', 'content': 0.06986915320158005, 'timestamp': '2025-10-01 04:44:10.050501', 'step': 19396, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:10.114284', 'step': 19396, 'epoch': 3} {'type': 'loss', 'content': 0.0629071444272995, 'timestamp': '2025-10-01 04:44:10.116466', 'step': 19397, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:10.170196', 'step': 19397, 'epoch': 3} {'type': 'loss', 'content': 0.058030761778354645, 'timestamp': '2025-10-01 04:44:10.172885', 'step': 19398, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:10.227330', 'step': 19398, 'epoch': 3} {'type': 'loss', 'content': 0.046710021793842316, 'timestamp': '2025-10-01 04:44:10.229922', 'step': 19399, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:10.284922', 'step': 19399, 'epoch': 3} {'type': 'loss', 'content': 0.09840737283229828, 'timestamp': '2025-10-01 04:44:10.300505', 'step': 19400, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:10.359871', 'step': 19400, 'epoch': 3} {'type': 'loss', 'content': 0.11248873174190521, 'timestamp': '2025-10-01 04:44:10.362443', 'step': 19401, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:10.418601', 'step': 19401, 'epoch': 3} {'type': 'loss', 'content': 0.039877165108919144, 'timestamp': '2025-10-01 04:44:10.421007', 'step': 19402, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:10.477736', 'step': 19402, 'epoch': 3} {'type': 'loss', 'content': 0.07166462391614914, 'timestamp': '2025-10-01 04:44:10.481280', 'step': 19403, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:10.536441', 'step': 19403, 'epoch': 3} {'type': 'loss', 'content': 0.12142874300479889, 'timestamp': '2025-10-01 04:44:10.542729', 'step': 19404, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:10.603408', 'step': 19404, 'epoch': 3} {'type': 'loss', 'content': 0.031586598604917526, 'timestamp': '2025-10-01 04:44:10.606071', 'step': 19405, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:10.660562', 'step': 19405, 'epoch': 3} {'type': 'loss', 'content': 0.08549930155277252, 'timestamp': '2025-10-01 04:44:10.662969', 'step': 19406, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:10.718149', 'step': 19406, 'epoch': 3} {'type': 'loss', 'content': 0.10842958092689514, 'timestamp': '2025-10-01 04:44:10.720986', 'step': 19407, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:44:10.776365', 'step': 19407, 'epoch': 3} {'type': 'loss', 'content': 0.06477337330579758, 'timestamp': '2025-10-01 04:44:10.782839', 'step': 19408, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:10.840568', 'step': 19408, 'epoch': 3} {'type': 'loss', 'content': 0.06357815116643906, 'timestamp': '2025-10-01 04:44:10.842263', 'step': 19409, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:44:10.896189', 'step': 19409, 'epoch': 3} {'type': 'loss', 'content': 0.0993538424372673, 'timestamp': '2025-10-01 04:44:10.898135', 'step': 19410, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:10.955230', 'step': 19410, 'epoch': 3} {'type': 'loss', 'content': 0.06386899203062057, 'timestamp': '2025-10-01 04:44:10.961573', 'step': 19411, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:11.018495', 'step': 19411, 'epoch': 3} {'type': 'loss', 'content': 0.03703363612294197, 'timestamp': '2025-10-01 04:44:11.033151', 'step': 19412, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:11.086901', 'step': 19412, 'epoch': 3} {'type': 'loss', 'content': 0.09163279086351395, 'timestamp': '2025-10-01 04:44:11.089179', 'step': 19413, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:11.142261', 'step': 19413, 'epoch': 3} {'type': 'loss', 'content': 0.052997712045907974, 'timestamp': '2025-10-01 04:44:11.144535', 'step': 19414, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:11.197837', 'step': 19414, 'epoch': 3} {'type': 'loss', 'content': 0.09046275168657303, 'timestamp': '2025-10-01 04:44:11.200029', 'step': 19415, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:11.259611', 'step': 19415, 'epoch': 3} {'type': 'loss', 'content': 0.06919436901807785, 'timestamp': '2025-10-01 04:44:11.265563', 'step': 19416, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:11.319032', 'step': 19416, 'epoch': 3} {'type': 'loss', 'content': 0.13756151497364044, 'timestamp': '2025-10-01 04:44:11.321035', 'step': 19417, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:11.374600', 'step': 19417, 'epoch': 3} {'type': 'loss', 'content': 0.12297208607196808, 'timestamp': '2025-10-01 04:44:11.376911', 'step': 19418, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:11.444327', 'step': 19418, 'epoch': 3} {'type': 'loss', 'content': 0.1621444970369339, 'timestamp': '2025-10-01 04:44:11.446595', 'step': 19419, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:11.513395', 'step': 19419, 'epoch': 3} {'type': 'loss', 'content': 0.06319643557071686, 'timestamp': '2025-10-01 04:44:11.522437', 'step': 19420, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:11.575799', 'step': 19420, 'epoch': 3} {'type': 'loss', 'content': 0.15822920203208923, 'timestamp': '2025-10-01 04:44:11.578302', 'step': 19421, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:11.632647', 'step': 19421, 'epoch': 3} {'type': 'loss', 'content': 0.05628427118062973, 'timestamp': '2025-10-01 04:44:11.635127', 'step': 19422, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:11.689271', 'step': 19422, 'epoch': 3} {'type': 'loss', 'content': 0.14796346426010132, 'timestamp': '2025-10-01 04:44:11.691723', 'step': 19423, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:11.756013', 'step': 19423, 'epoch': 3} {'type': 'loss', 'content': 0.07764419913291931, 'timestamp': '2025-10-01 04:44:11.762023', 'step': 19424, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:11.814976', 'step': 19424, 'epoch': 3} {'type': 'loss', 'content': 0.12121404707431793, 'timestamp': '2025-10-01 04:44:11.817098', 'step': 19425, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:11.871952', 'step': 19425, 'epoch': 3} {'type': 'loss', 'content': 0.058802466839551926, 'timestamp': '2025-10-01 04:44:11.874073', 'step': 19426, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:11.932678', 'step': 19426, 'epoch': 3} {'type': 'loss', 'content': 0.11503101140260696, 'timestamp': '2025-10-01 04:44:11.935556', 'step': 19427, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:11.996952', 'step': 19427, 'epoch': 3} {'type': 'loss', 'content': 0.13215814530849457, 'timestamp': '2025-10-01 04:44:12.004185', 'step': 19428, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:12.063286', 'step': 19428, 'epoch': 3} {'type': 'loss', 'content': 0.07045930624008179, 'timestamp': '2025-10-01 04:44:12.071622', 'step': 19429, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:12.133540', 'step': 19429, 'epoch': 3} {'type': 'loss', 'content': 0.16690243780612946, 'timestamp': '2025-10-01 04:44:12.136228', 'step': 19430, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:12.197217', 'step': 19430, 'epoch': 3} {'type': 'loss', 'content': 0.08963149040937424, 'timestamp': '2025-10-01 04:44:12.199390', 'step': 19431, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:44:12.258499', 'step': 19431, 'epoch': 3} {'type': 'loss', 'content': 0.10155849903821945, 'timestamp': '2025-10-01 04:44:12.265437', 'step': 19432, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:12.323658', 'step': 19432, 'epoch': 3} {'type': 'loss', 'content': 0.120704285800457, 'timestamp': '2025-10-01 04:44:12.326119', 'step': 19433, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:12.383794', 'step': 19433, 'epoch': 3} {'type': 'loss', 'content': 0.1533903181552887, 'timestamp': '2025-10-01 04:44:12.386187', 'step': 19434, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:12.449490', 'step': 19434, 'epoch': 3} {'type': 'loss', 'content': 0.10931605845689774, 'timestamp': '2025-10-01 04:44:12.451669', 'step': 19435, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:12.508299', 'step': 19435, 'epoch': 3} {'type': 'loss', 'content': 0.09324678778648376, 'timestamp': '2025-10-01 04:44:12.514834', 'step': 19436, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:12.572656', 'step': 19436, 'epoch': 3} {'type': 'loss', 'content': 0.08224615454673767, 'timestamp': '2025-10-01 04:44:12.575839', 'step': 19437, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:12.635418', 'step': 19437, 'epoch': 3} {'type': 'loss', 'content': 0.09247523546218872, 'timestamp': '2025-10-01 04:44:12.637545', 'step': 19438, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:12.691976', 'step': 19438, 'epoch': 3} {'type': 'loss', 'content': 0.09438258409500122, 'timestamp': '2025-10-01 04:44:12.694439', 'step': 19439, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:12.748364', 'step': 19439, 'epoch': 3} {'type': 'loss', 'content': 0.08346573263406754, 'timestamp': '2025-10-01 04:44:12.763116', 'step': 19440, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:12.821791', 'step': 19440, 'epoch': 3} {'type': 'loss', 'content': 0.0783771276473999, 'timestamp': '2025-10-01 04:44:12.823933', 'step': 19441, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:12.877531', 'step': 19441, 'epoch': 3} {'type': 'loss', 'content': 0.14498348534107208, 'timestamp': '2025-10-01 04:44:12.879797', 'step': 19442, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:12.934582', 'step': 19442, 'epoch': 3} {'type': 'loss', 'content': 0.11383822560310364, 'timestamp': '2025-10-01 04:44:12.939371', 'step': 19443, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:12.993310', 'step': 19443, 'epoch': 3} {'type': 'loss', 'content': 0.04935521259903908, 'timestamp': '2025-10-01 04:44:12.999361', 'step': 19444, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:44:13.059871', 'step': 19444, 'epoch': 3} {'type': 'loss', 'content': 0.036827586591243744, 'timestamp': '2025-10-01 04:44:13.062173', 'step': 19445, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:13.115476', 'step': 19445, 'epoch': 3} {'type': 'loss', 'content': 0.08169647306203842, 'timestamp': '2025-10-01 04:44:13.117617', 'step': 19446, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:13.171086', 'step': 19446, 'epoch': 3} {'type': 'loss', 'content': 0.1084841638803482, 'timestamp': '2025-10-01 04:44:13.173186', 'step': 19447, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:44:13.227124', 'step': 19447, 'epoch': 3} {'type': 'loss', 'content': 0.04765498638153076, 'timestamp': '2025-10-01 04:44:13.243300', 'step': 19448, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:44:13.297330', 'step': 19448, 'epoch': 3} {'type': 'loss', 'content': 0.13792189955711365, 'timestamp': '2025-10-01 04:44:13.299725', 'step': 19449, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:13.353482', 'step': 19449, 'epoch': 3} {'type': 'loss', 'content': 0.09274400025606155, 'timestamp': '2025-10-01 04:44:13.357849', 'step': 19450, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:44:13.411451', 'step': 19450, 'epoch': 3} {'type': 'loss', 'content': 0.08052418380975723, 'timestamp': '2025-10-01 04:44:13.413562', 'step': 19451, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:44:13.466545', 'step': 19451, 'epoch': 3} {'type': 'loss', 'content': 0.0648641586303711, 'timestamp': '2025-10-01 04:44:13.472486', 'step': 19452, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:44:13.525101', 'step': 19452, 'epoch': 3} {'type': 'loss', 'content': 0.06411021202802658, 'timestamp': '2025-10-01 04:44:13.527177', 'step': 19453, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:13.580959', 'step': 19453, 'epoch': 3} {'type': 'loss', 'content': 0.18087905645370483, 'timestamp': '2025-10-01 04:44:13.583203', 'step': 19454, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:13.636988', 'step': 19454, 'epoch': 3} {'type': 'loss', 'content': 0.08347131311893463, 'timestamp': '2025-10-01 04:44:13.639163', 'step': 19455, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:13.694258', 'step': 19455, 'epoch': 3} {'type': 'loss', 'content': 0.10337518155574799, 'timestamp': '2025-10-01 04:44:13.700433', 'step': 19456, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:13.754393', 'step': 19456, 'epoch': 3} {'type': 'loss', 'content': 0.07239865511655807, 'timestamp': '2025-10-01 04:44:13.756887', 'step': 19457, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:13.809946', 'step': 19457, 'epoch': 3} {'type': 'loss', 'content': 0.12816011905670166, 'timestamp': '2025-10-01 04:44:13.812133', 'step': 19458, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:13.865467', 'step': 19458, 'epoch': 3} {'type': 'loss', 'content': 0.05612054094672203, 'timestamp': '2025-10-01 04:44:13.867702', 'step': 19459, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:13.920874', 'step': 19459, 'epoch': 3} {'type': 'loss', 'content': 0.08692754060029984, 'timestamp': '2025-10-01 04:44:13.926774', 'step': 19460, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:13.979402', 'step': 19460, 'epoch': 3} {'type': 'loss', 'content': 0.13075262308120728, 'timestamp': '2025-10-01 04:44:13.981487', 'step': 19461, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:44:14.034932', 'step': 19461, 'epoch': 3} {'type': 'loss', 'content': 0.07138130068778992, 'timestamp': '2025-10-01 04:44:14.037136', 'step': 19462, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:14.090435', 'step': 19462, 'epoch': 3} {'type': 'loss', 'content': 0.15594853460788727, 'timestamp': '2025-10-01 04:44:14.092710', 'step': 19463, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:14.145905', 'step': 19463, 'epoch': 3} {'type': 'loss', 'content': 0.04305421561002731, 'timestamp': '2025-10-01 04:44:14.151586', 'step': 19464, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:14.204728', 'step': 19464, 'epoch': 3} {'type': 'loss', 'content': 0.14649993181228638, 'timestamp': '2025-10-01 04:44:14.207045', 'step': 19465, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:14.260320', 'step': 19465, 'epoch': 3} {'type': 'loss', 'content': 0.04935520514845848, 'timestamp': '2025-10-01 04:44:14.262564', 'step': 19466, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:14.330576', 'step': 19466, 'epoch': 3} {'type': 'loss', 'content': 0.07459046691656113, 'timestamp': '2025-10-01 04:44:14.332873', 'step': 19467, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:14.388448', 'step': 19467, 'epoch': 3} {'type': 'loss', 'content': 0.0991906151175499, 'timestamp': '2025-10-01 04:44:14.394117', 'step': 19468, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:14.447780', 'step': 19468, 'epoch': 3} {'type': 'loss', 'content': 0.06991424411535263, 'timestamp': '2025-10-01 04:44:14.449954', 'step': 19469, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:14.504868', 'step': 19469, 'epoch': 3} {'type': 'loss', 'content': 0.1557593047618866, 'timestamp': '2025-10-01 04:44:14.507296', 'step': 19470, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:14.566190', 'step': 19470, 'epoch': 3} {'type': 'loss', 'content': 0.03445575758814812, 'timestamp': '2025-10-01 04:44:14.568368', 'step': 19471, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:14.637933', 'step': 19471, 'epoch': 3} {'type': 'loss', 'content': 0.03623087331652641, 'timestamp': '2025-10-01 04:44:14.643722', 'step': 19472, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:14.697917', 'step': 19472, 'epoch': 3} {'type': 'loss', 'content': 0.1007583811879158, 'timestamp': '2025-10-01 04:44:14.700121', 'step': 19473, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:14.753135', 'step': 19473, 'epoch': 3} {'type': 'loss', 'content': 0.11946649849414825, 'timestamp': '2025-10-01 04:44:14.755503', 'step': 19474, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:14.811240', 'step': 19474, 'epoch': 3} {'type': 'loss', 'content': 0.13049887120723724, 'timestamp': '2025-10-01 04:44:14.813477', 'step': 19475, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:44:14.866977', 'step': 19475, 'epoch': 3} {'type': 'loss', 'content': 0.04760444909334183, 'timestamp': '2025-10-01 04:44:14.872659', 'step': 19476, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:14.926253', 'step': 19476, 'epoch': 3} {'type': 'loss', 'content': 0.13224704563617706, 'timestamp': '2025-10-01 04:44:14.932566', 'step': 19477, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:44:14.985732', 'step': 19477, 'epoch': 3} {'type': 'loss', 'content': 0.06023101136088371, 'timestamp': '2025-10-01 04:44:14.988064', 'step': 19478, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:44:15.042967', 'step': 19478, 'epoch': 3} {'type': 'loss', 'content': 0.04425958916544914, 'timestamp': '2025-10-01 04:44:15.045126', 'step': 19479, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:15.098543', 'step': 19479, 'epoch': 3} {'type': 'loss', 'content': 0.054354697465896606, 'timestamp': '2025-10-01 04:44:15.104394', 'step': 19480, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:15.158007', 'step': 19480, 'epoch': 3} {'type': 'loss', 'content': 0.09724978357553482, 'timestamp': '2025-10-01 04:44:15.160397', 'step': 19481, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:15.225733', 'step': 19481, 'epoch': 3} {'type': 'loss', 'content': 0.05569016933441162, 'timestamp': '2025-10-01 04:44:15.227818', 'step': 19482, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:15.281122', 'step': 19482, 'epoch': 3} {'type': 'loss', 'content': 0.08784966170787811, 'timestamp': '2025-10-01 04:44:15.283193', 'step': 19483, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:15.336434', 'step': 19483, 'epoch': 3} {'type': 'loss', 'content': 0.11012452840805054, 'timestamp': '2025-10-01 04:44:15.342418', 'step': 19484, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:15.395009', 'step': 19484, 'epoch': 3} {'type': 'loss', 'content': 0.09363093972206116, 'timestamp': '2025-10-01 04:44:15.400096', 'step': 19485, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:15.464202', 'step': 19485, 'epoch': 3} {'type': 'loss', 'content': 0.045241158455610275, 'timestamp': '2025-10-01 04:44:15.466419', 'step': 19486, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:15.519625', 'step': 19486, 'epoch': 3} {'type': 'loss', 'content': 0.10242223739624023, 'timestamp': '2025-10-01 04:44:15.521783', 'step': 19487, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:15.575661', 'step': 19487, 'epoch': 3} {'type': 'loss', 'content': 0.019968193024396896, 'timestamp': '2025-10-01 04:44:15.581937', 'step': 19488, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:15.636478', 'step': 19488, 'epoch': 3} {'type': 'loss', 'content': 0.1072823777794838, 'timestamp': '2025-10-01 04:44:15.641004', 'step': 19489, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:15.695137', 'step': 19489, 'epoch': 3} {'type': 'loss', 'content': 0.1550615429878235, 'timestamp': '2025-10-01 04:44:15.697393', 'step': 19490, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:15.750686', 'step': 19490, 'epoch': 3} {'type': 'loss', 'content': 0.04071086645126343, 'timestamp': '2025-10-01 04:44:15.752721', 'step': 19491, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:15.806732', 'step': 19491, 'epoch': 3} {'type': 'loss', 'content': 0.08220455050468445, 'timestamp': '2025-10-01 04:44:15.814987', 'step': 19492, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:44:15.868312', 'step': 19492, 'epoch': 3} {'type': 'loss', 'content': 0.13425463438034058, 'timestamp': '2025-10-01 04:44:15.870403', 'step': 19493, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:15.923984', 'step': 19493, 'epoch': 3} {'type': 'loss', 'content': 0.03923148661851883, 'timestamp': '2025-10-01 04:44:15.926120', 'step': 19494, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:15.979824', 'step': 19494, 'epoch': 3} {'type': 'loss', 'content': 0.09028195589780807, 'timestamp': '2025-10-01 04:44:15.981925', 'step': 19495, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:16.035011', 'step': 19495, 'epoch': 3} {'type': 'loss', 'content': 0.038314707577228546, 'timestamp': '2025-10-01 04:44:16.046795', 'step': 19496, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:44:16.099880', 'step': 19496, 'epoch': 3} {'type': 'loss', 'content': 0.05719504877924919, 'timestamp': '2025-10-01 04:44:16.101967', 'step': 19497, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:16.156885', 'step': 19497, 'epoch': 3} {'type': 'loss', 'content': 0.07699073851108551, 'timestamp': '2025-10-01 04:44:16.158949', 'step': 19498, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:44:16.212549', 'step': 19498, 'epoch': 3} {'type': 'loss', 'content': 0.037709012627601624, 'timestamp': '2025-10-01 04:44:16.214804', 'step': 19499, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:16.269172', 'step': 19499, 'epoch': 3} {'type': 'loss', 'content': 0.059990208595991135, 'timestamp': '2025-10-01 04:44:16.275209', 'step': 19500, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 19500', 'timestamp': '2025-10-01 04:44:16.655438', 'step': 19500, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:16.711462', 'step': 19500, 'epoch': 3} {'type': 'loss', 'content': 0.052289772778749466, 'timestamp': '2025-10-01 04:44:16.713753', 'step': 19501, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:44:16.768055', 'step': 19501, 'epoch': 3} {'type': 'loss', 'content': 0.08395873755216599, 'timestamp': '2025-10-01 04:44:16.770460', 'step': 19502, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:44:16.823830', 'step': 19502, 'epoch': 3} {'type': 'loss', 'content': 0.14641430974006653, 'timestamp': '2025-10-01 04:44:16.826269', 'step': 19503, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:16.880114', 'step': 19503, 'epoch': 3} {'type': 'loss', 'content': 0.0392204113304615, 'timestamp': '2025-10-01 04:44:16.886093', 'step': 19504, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:16.938591', 'step': 19504, 'epoch': 3} {'type': 'loss', 'content': 0.12159204483032227, 'timestamp': '2025-10-01 04:44:16.940611', 'step': 19505, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:16.993577', 'step': 19505, 'epoch': 3} {'type': 'loss', 'content': 0.05445385351777077, 'timestamp': '2025-10-01 04:44:16.995694', 'step': 19506, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:17.048753', 'step': 19506, 'epoch': 3} {'type': 'loss', 'content': 0.042023006826639175, 'timestamp': '2025-10-01 04:44:17.050864', 'step': 19507, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:17.104222', 'step': 19507, 'epoch': 3} {'type': 'loss', 'content': 0.09571170061826706, 'timestamp': '2025-10-01 04:44:17.110330', 'step': 19508, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:44:17.162859', 'step': 19508, 'epoch': 3} {'type': 'loss', 'content': 0.07961560785770416, 'timestamp': '2025-10-01 04:44:17.164941', 'step': 19509, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:17.218332', 'step': 19509, 'epoch': 3} {'type': 'loss', 'content': 0.0773581713438034, 'timestamp': '2025-10-01 04:44:17.220409', 'step': 19510, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:17.273606', 'step': 19510, 'epoch': 3} {'type': 'loss', 'content': 0.04046133905649185, 'timestamp': '2025-10-01 04:44:17.275688', 'step': 19511, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:44:17.329843', 'step': 19511, 'epoch': 3} {'type': 'loss', 'content': 0.06358060240745544, 'timestamp': '2025-10-01 04:44:17.335897', 'step': 19512, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:17.389113', 'step': 19512, 'epoch': 3} {'type': 'loss', 'content': 0.06652620434761047, 'timestamp': '2025-10-01 04:44:17.391771', 'step': 19513, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:17.444951', 'step': 19513, 'epoch': 3} {'type': 'loss', 'content': 0.1773410141468048, 'timestamp': '2025-10-01 04:44:17.447377', 'step': 19514, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:44:17.502167', 'step': 19514, 'epoch': 3} {'type': 'loss', 'content': 0.08134502917528152, 'timestamp': '2025-10-01 04:44:17.504330', 'step': 19515, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:17.574995', 'step': 19515, 'epoch': 3} {'type': 'loss', 'content': 0.06704340875148773, 'timestamp': '2025-10-01 04:44:17.581344', 'step': 19516, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:17.634939', 'step': 19516, 'epoch': 3} {'type': 'loss', 'content': 0.08725286275148392, 'timestamp': '2025-10-01 04:44:17.637019', 'step': 19517, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:17.691066', 'step': 19517, 'epoch': 3} {'type': 'loss', 'content': 0.052023231983184814, 'timestamp': '2025-10-01 04:44:17.693110', 'step': 19518, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:44:17.746639', 'step': 19518, 'epoch': 3} {'type': 'loss', 'content': 0.1118120476603508, 'timestamp': '2025-10-01 04:44:17.748703', 'step': 19519, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:17.801878', 'step': 19519, 'epoch': 3} {'type': 'loss', 'content': 0.08183532953262329, 'timestamp': '2025-10-01 04:44:17.807753', 'step': 19520, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:17.862533', 'step': 19520, 'epoch': 3} {'type': 'loss', 'content': 0.09817937761545181, 'timestamp': '2025-10-01 04:44:17.865400', 'step': 19521, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:17.920405', 'step': 19521, 'epoch': 3} {'type': 'loss', 'content': 0.05709698796272278, 'timestamp': '2025-10-01 04:44:17.923148', 'step': 19522, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:44:17.978897', 'step': 19522, 'epoch': 3} {'type': 'loss', 'content': 0.11993131786584854, 'timestamp': '2025-10-01 04:44:17.980938', 'step': 19523, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:18.034027', 'step': 19523, 'epoch': 3} {'type': 'loss', 'content': 0.09843863546848297, 'timestamp': '2025-10-01 04:44:18.039907', 'step': 19524, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:18.094026', 'step': 19524, 'epoch': 3} {'type': 'loss', 'content': 0.09842289239168167, 'timestamp': '2025-10-01 04:44:18.096088', 'step': 19525, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:18.149534', 'step': 19525, 'epoch': 3} {'type': 'loss', 'content': 0.0772586464881897, 'timestamp': '2025-10-01 04:44:18.151607', 'step': 19526, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:18.211666', 'step': 19526, 'epoch': 3} {'type': 'loss', 'content': 0.06895444542169571, 'timestamp': '2025-10-01 04:44:18.213940', 'step': 19527, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:18.267427', 'step': 19527, 'epoch': 3} {'type': 'loss', 'content': 0.05673250928521156, 'timestamp': '2025-10-01 04:44:18.274078', 'step': 19528, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:18.340319', 'step': 19528, 'epoch': 3} {'type': 'loss', 'content': 0.056180261075496674, 'timestamp': '2025-10-01 04:44:18.342628', 'step': 19529, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:18.397699', 'step': 19529, 'epoch': 3} {'type': 'loss', 'content': 0.059774693101644516, 'timestamp': '2025-10-01 04:44:18.399768', 'step': 19530, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:18.453093', 'step': 19530, 'epoch': 3} {'type': 'loss', 'content': 0.13291509449481964, 'timestamp': '2025-10-01 04:44:18.455319', 'step': 19531, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:18.508737', 'step': 19531, 'epoch': 3} {'type': 'loss', 'content': 0.10602319240570068, 'timestamp': '2025-10-01 04:44:18.514630', 'step': 19532, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:18.568780', 'step': 19532, 'epoch': 3} {'type': 'loss', 'content': 0.2086009532213211, 'timestamp': '2025-10-01 04:44:18.571297', 'step': 19533, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:18.624372', 'step': 19533, 'epoch': 3} {'type': 'loss', 'content': 0.09239137172698975, 'timestamp': '2025-10-01 04:44:18.626638', 'step': 19534, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:18.680072', 'step': 19534, 'epoch': 3} {'type': 'loss', 'content': 0.05639265105128288, 'timestamp': '2025-10-01 04:44:18.682216', 'step': 19535, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:18.735392', 'step': 19535, 'epoch': 3} {'type': 'loss', 'content': 0.050819698721170425, 'timestamp': '2025-10-01 04:44:18.741207', 'step': 19536, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:18.793891', 'step': 19536, 'epoch': 3} {'type': 'loss', 'content': 0.042334649711847305, 'timestamp': '2025-10-01 04:44:18.796098', 'step': 19537, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:44:18.849150', 'step': 19537, 'epoch': 3} {'type': 'loss', 'content': 0.04688267409801483, 'timestamp': '2025-10-01 04:44:18.851337', 'step': 19538, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:18.905273', 'step': 19538, 'epoch': 3} {'type': 'loss', 'content': 0.06458650529384613, 'timestamp': '2025-10-01 04:44:18.907459', 'step': 19539, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:18.960749', 'step': 19539, 'epoch': 3} {'type': 'loss', 'content': 0.17065627872943878, 'timestamp': '2025-10-01 04:44:18.966421', 'step': 19540, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:19.018995', 'step': 19540, 'epoch': 3} {'type': 'loss', 'content': 0.06794065237045288, 'timestamp': '2025-10-01 04:44:19.021210', 'step': 19541, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:44:19.074250', 'step': 19541, 'epoch': 3} {'type': 'loss', 'content': 0.08978671580553055, 'timestamp': '2025-10-01 04:44:19.076640', 'step': 19542, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:19.131096', 'step': 19542, 'epoch': 3} {'type': 'loss', 'content': 0.033301178365945816, 'timestamp': '2025-10-01 04:44:19.135359', 'step': 19543, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:19.197757', 'step': 19543, 'epoch': 3} {'type': 'loss', 'content': 0.03101949393749237, 'timestamp': '2025-10-01 04:44:19.203487', 'step': 19544, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:19.255816', 'step': 19544, 'epoch': 3} {'type': 'loss', 'content': 0.23322023451328278, 'timestamp': '2025-10-01 04:44:19.258000', 'step': 19545, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:19.327714', 'step': 19545, 'epoch': 3} {'type': 'loss', 'content': 0.11331132799386978, 'timestamp': '2025-10-01 04:44:19.329859', 'step': 19546, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:44:19.384328', 'step': 19546, 'epoch': 3} {'type': 'loss', 'content': 0.06391265988349915, 'timestamp': '2025-10-01 04:44:19.390865', 'step': 19547, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:19.447645', 'step': 19547, 'epoch': 3} {'type': 'loss', 'content': 0.02864614687860012, 'timestamp': '2025-10-01 04:44:19.457285', 'step': 19548, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:19.510899', 'step': 19548, 'epoch': 3} {'type': 'loss', 'content': 0.13549329340457916, 'timestamp': '2025-10-01 04:44:19.512997', 'step': 19549, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:19.579586', 'step': 19549, 'epoch': 3} {'type': 'loss', 'content': 0.09190646559000015, 'timestamp': '2025-10-01 04:44:19.581683', 'step': 19550, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:19.636030', 'step': 19550, 'epoch': 3} {'type': 'loss', 'content': 0.03912600874900818, 'timestamp': '2025-10-01 04:44:19.638144', 'step': 19551, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:19.691864', 'step': 19551, 'epoch': 3} {'type': 'loss', 'content': 0.09849508851766586, 'timestamp': '2025-10-01 04:44:19.698247', 'step': 19552, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:19.752325', 'step': 19552, 'epoch': 3} {'type': 'loss', 'content': 0.09719076007604599, 'timestamp': '2025-10-01 04:44:19.754398', 'step': 19553, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:19.809128', 'step': 19553, 'epoch': 3} {'type': 'loss', 'content': 0.053637899458408356, 'timestamp': '2025-10-01 04:44:19.811331', 'step': 19554, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:44:19.864499', 'step': 19554, 'epoch': 3} {'type': 'loss', 'content': 0.06937111914157867, 'timestamp': '2025-10-01 04:44:19.872993', 'step': 19555, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:19.926377', 'step': 19555, 'epoch': 3} {'type': 'loss', 'content': 0.059254202991724014, 'timestamp': '2025-10-01 04:44:19.932675', 'step': 19556, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:19.985474', 'step': 19556, 'epoch': 3} {'type': 'loss', 'content': 0.1902141124010086, 'timestamp': '2025-10-01 04:44:19.987907', 'step': 19557, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:20.041348', 'step': 19557, 'epoch': 3} {'type': 'loss', 'content': 0.13958749175071716, 'timestamp': '2025-10-01 04:44:20.043449', 'step': 19558, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:20.104435', 'step': 19558, 'epoch': 3} {'type': 'loss', 'content': 0.1797848641872406, 'timestamp': '2025-10-01 04:44:20.109442', 'step': 19559, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:20.164005', 'step': 19559, 'epoch': 3} {'type': 'loss', 'content': 0.13602247834205627, 'timestamp': '2025-10-01 04:44:20.169881', 'step': 19560, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:20.222297', 'step': 19560, 'epoch': 3} {'type': 'loss', 'content': 0.07640159130096436, 'timestamp': '2025-10-01 04:44:20.224354', 'step': 19561, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:44:20.277695', 'step': 19561, 'epoch': 3} {'type': 'loss', 'content': 0.08012014627456665, 'timestamp': '2025-10-01 04:44:20.279774', 'step': 19562, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:20.332693', 'step': 19562, 'epoch': 3} {'type': 'loss', 'content': 0.02633378654718399, 'timestamp': '2025-10-01 04:44:20.334777', 'step': 19563, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:20.388364', 'step': 19563, 'epoch': 3} {'type': 'loss', 'content': 0.0970827266573906, 'timestamp': '2025-10-01 04:44:20.394064', 'step': 19564, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:20.446900', 'step': 19564, 'epoch': 3} {'type': 'loss', 'content': 0.10803437232971191, 'timestamp': '2025-10-01 04:44:20.449016', 'step': 19565, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:20.506096', 'step': 19565, 'epoch': 3} {'type': 'loss', 'content': 0.06827696412801743, 'timestamp': '2025-10-01 04:44:20.513977', 'step': 19566, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:20.568823', 'step': 19566, 'epoch': 3} {'type': 'loss', 'content': 0.13238996267318726, 'timestamp': '2025-10-01 04:44:20.574609', 'step': 19567, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:44:20.628859', 'step': 19567, 'epoch': 3} {'type': 'loss', 'content': 0.06802237033843994, 'timestamp': '2025-10-01 04:44:20.635024', 'step': 19568, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:44:20.688577', 'step': 19568, 'epoch': 3} {'type': 'loss', 'content': 0.04907691106200218, 'timestamp': '2025-10-01 04:44:20.697404', 'step': 19569, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:20.750814', 'step': 19569, 'epoch': 3} {'type': 'loss', 'content': 0.10910769551992416, 'timestamp': '2025-10-01 04:44:20.753171', 'step': 19570, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:20.806821', 'step': 19570, 'epoch': 3} {'type': 'loss', 'content': 0.1809626817703247, 'timestamp': '2025-10-01 04:44:20.810935', 'step': 19571, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:20.865945', 'step': 19571, 'epoch': 3} {'type': 'loss', 'content': 0.0731939896941185, 'timestamp': '2025-10-01 04:44:20.871930', 'step': 19572, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-01 04:44:34.089528', 'step': 19572, 'epoch': 3} {'type': 'pplx', 'content': 11079.91757068143, 'timestamp': '2025-10-01 04:44:34.092681', 'step': 19572, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:34.145918', 'step': 19572, 'epoch': 3} {'type': 'loss', 'content': 0.08038096874952316, 'timestamp': '2025-10-01 04:44:34.147770', 'step': 19573, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:34.203678', 'step': 19573, 'epoch': 3} {'type': 'loss', 'content': 0.1905565857887268, 'timestamp': '2025-10-01 04:44:34.205814', 'step': 19574, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:34.260825', 'step': 19574, 'epoch': 3} {'type': 'loss', 'content': 0.12145664542913437, 'timestamp': '2025-10-01 04:44:34.263108', 'step': 19575, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:34.316753', 'step': 19575, 'epoch': 3} {'type': 'loss', 'content': 0.04047296196222305, 'timestamp': '2025-10-01 04:44:34.322781', 'step': 19576, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:34.376399', 'step': 19576, 'epoch': 3} {'type': 'loss', 'content': 0.0704035609960556, 'timestamp': '2025-10-01 04:44:34.378481', 'step': 19577, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:34.431742', 'step': 19577, 'epoch': 3} {'type': 'loss', 'content': 0.12326275557279587, 'timestamp': '2025-10-01 04:44:34.434105', 'step': 19578, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:44:34.488478', 'step': 19578, 'epoch': 3} {'type': 'loss', 'content': 0.07725682854652405, 'timestamp': '2025-10-01 04:44:34.490332', 'step': 19579, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:34.543617', 'step': 19579, 'epoch': 3} {'type': 'loss', 'content': 0.16009192168712616, 'timestamp': '2025-10-01 04:44:34.549769', 'step': 19580, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:34.615556', 'step': 19580, 'epoch': 3} {'type': 'loss', 'content': 0.03684264421463013, 'timestamp': '2025-10-01 04:44:34.617762', 'step': 19581, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:34.671539', 'step': 19581, 'epoch': 3} {'type': 'loss', 'content': 0.08303138613700867, 'timestamp': '2025-10-01 04:44:34.673893', 'step': 19582, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:34.726921', 'step': 19582, 'epoch': 3} {'type': 'loss', 'content': 0.10106534510850906, 'timestamp': '2025-10-01 04:44:34.729024', 'step': 19583, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:34.782191', 'step': 19583, 'epoch': 3} {'type': 'loss', 'content': 0.10258865356445312, 'timestamp': '2025-10-01 04:44:34.787974', 'step': 19584, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:34.840325', 'step': 19584, 'epoch': 3} {'type': 'loss', 'content': 0.06190581992268562, 'timestamp': '2025-10-01 04:44:34.842607', 'step': 19585, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:34.896440', 'step': 19585, 'epoch': 3} {'type': 'loss', 'content': 0.10545049607753754, 'timestamp': '2025-10-01 04:44:34.898817', 'step': 19586, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:44:34.953325', 'step': 19586, 'epoch': 3} {'type': 'loss', 'content': 0.030827710404992104, 'timestamp': '2025-10-01 04:44:34.955422', 'step': 19587, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:35.009955', 'step': 19587, 'epoch': 3} {'type': 'loss', 'content': 0.13064968585968018, 'timestamp': '2025-10-01 04:44:35.016522', 'step': 19588, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:35.069645', 'step': 19588, 'epoch': 3} {'type': 'loss', 'content': 0.052404291927814484, 'timestamp': '2025-10-01 04:44:35.072086', 'step': 19589, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:44:35.126575', 'step': 19589, 'epoch': 3} {'type': 'loss', 'content': 0.11522060632705688, 'timestamp': '2025-10-01 04:44:35.130917', 'step': 19590, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:35.186827', 'step': 19590, 'epoch': 3} {'type': 'loss', 'content': 0.12308448553085327, 'timestamp': '2025-10-01 04:44:35.189331', 'step': 19591, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:35.257015', 'step': 19591, 'epoch': 3} {'type': 'loss', 'content': 0.17934879660606384, 'timestamp': '2025-10-01 04:44:35.262852', 'step': 19592, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:44:35.316581', 'step': 19592, 'epoch': 3} {'type': 'loss', 'content': 0.10835874080657959, 'timestamp': '2025-10-01 04:44:35.318999', 'step': 19593, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:35.373072', 'step': 19593, 'epoch': 3} {'type': 'loss', 'content': 0.09406326711177826, 'timestamp': '2025-10-01 04:44:35.375157', 'step': 19594, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:44:35.435912', 'step': 19594, 'epoch': 3} {'type': 'loss', 'content': 0.16133582592010498, 'timestamp': '2025-10-01 04:44:35.438413', 'step': 19595, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:35.492548', 'step': 19595, 'epoch': 3} {'type': 'loss', 'content': 0.09196342527866364, 'timestamp': '2025-10-01 04:44:35.498658', 'step': 19596, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:35.552774', 'step': 19596, 'epoch': 3} {'type': 'loss', 'content': 0.10684555768966675, 'timestamp': '2025-10-01 04:44:35.555722', 'step': 19597, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:35.610130', 'step': 19597, 'epoch': 3} {'type': 'loss', 'content': 0.10317189991474152, 'timestamp': '2025-10-01 04:44:35.612702', 'step': 19598, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:35.666875', 'step': 19598, 'epoch': 3} {'type': 'loss', 'content': 0.16079489886760712, 'timestamp': '2025-10-01 04:44:35.671599', 'step': 19599, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:35.726705', 'step': 19599, 'epoch': 3} {'type': 'loss', 'content': 0.07314823567867279, 'timestamp': '2025-10-01 04:44:35.732465', 'step': 19600, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:35.785898', 'step': 19600, 'epoch': 3} {'type': 'loss', 'content': 0.07278215885162354, 'timestamp': '2025-10-01 04:44:35.787694', 'step': 19601, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:35.841620', 'step': 19601, 'epoch': 3} {'type': 'loss', 'content': 0.091206394135952, 'timestamp': '2025-10-01 04:44:35.844328', 'step': 19602, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:35.898828', 'step': 19602, 'epoch': 3} {'type': 'loss', 'content': 0.04150749370455742, 'timestamp': '2025-10-01 04:44:35.901529', 'step': 19603, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:35.956058', 'step': 19603, 'epoch': 3} {'type': 'loss', 'content': 0.11920074373483658, 'timestamp': '2025-10-01 04:44:35.962161', 'step': 19604, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:36.023991', 'step': 19604, 'epoch': 3} {'type': 'loss', 'content': 0.09085918217897415, 'timestamp': '2025-10-01 04:44:36.026628', 'step': 19605, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:36.090769', 'step': 19605, 'epoch': 3} {'type': 'loss', 'content': 0.09905875474214554, 'timestamp': '2025-10-01 04:44:36.092665', 'step': 19606, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:44:36.147664', 'step': 19606, 'epoch': 3} {'type': 'loss', 'content': 0.10514669120311737, 'timestamp': '2025-10-01 04:44:36.150166', 'step': 19607, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:36.205212', 'step': 19607, 'epoch': 3} {'type': 'loss', 'content': 0.09535430371761322, 'timestamp': '2025-10-01 04:44:36.210962', 'step': 19608, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:36.267738', 'step': 19608, 'epoch': 3} {'type': 'loss', 'content': 0.03761561959981918, 'timestamp': '2025-10-01 04:44:36.270081', 'step': 19609, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:44:36.325129', 'step': 19609, 'epoch': 3} {'type': 'loss', 'content': 0.06939568370580673, 'timestamp': '2025-10-01 04:44:36.327501', 'step': 19610, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:36.382955', 'step': 19610, 'epoch': 3} {'type': 'loss', 'content': 0.11232655495405197, 'timestamp': '2025-10-01 04:44:36.385398', 'step': 19611, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:36.440482', 'step': 19611, 'epoch': 3} {'type': 'loss', 'content': 0.0625067949295044, 'timestamp': '2025-10-01 04:44:36.446324', 'step': 19612, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:36.499890', 'step': 19612, 'epoch': 3} {'type': 'loss', 'content': 0.05845734104514122, 'timestamp': '2025-10-01 04:44:36.501705', 'step': 19613, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:36.555330', 'step': 19613, 'epoch': 3} {'type': 'loss', 'content': 0.09170863032341003, 'timestamp': '2025-10-01 04:44:36.557221', 'step': 19614, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:36.611531', 'step': 19614, 'epoch': 3} {'type': 'loss', 'content': 0.06687041372060776, 'timestamp': '2025-10-01 04:44:36.613398', 'step': 19615, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:44:36.666503', 'step': 19615, 'epoch': 3} {'type': 'loss', 'content': 0.08323357254266739, 'timestamp': '2025-10-01 04:44:36.672365', 'step': 19616, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:36.725012', 'step': 19616, 'epoch': 3} {'type': 'loss', 'content': 0.05087464302778244, 'timestamp': '2025-10-01 04:44:36.727125', 'step': 19617, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:44:36.781061', 'step': 19617, 'epoch': 3} {'type': 'loss', 'content': 0.14831669628620148, 'timestamp': '2025-10-01 04:44:36.783382', 'step': 19618, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:36.836692', 'step': 19618, 'epoch': 3} {'type': 'loss', 'content': 0.11380015313625336, 'timestamp': '2025-10-01 04:44:36.838750', 'step': 19619, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:36.892800', 'step': 19619, 'epoch': 3} {'type': 'loss', 'content': 0.22973540425300598, 'timestamp': '2025-10-01 04:44:36.898724', 'step': 19620, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:36.952026', 'step': 19620, 'epoch': 3} {'type': 'loss', 'content': 0.09804055839776993, 'timestamp': '2025-10-01 04:44:36.956743', 'step': 19621, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:37.011419', 'step': 19621, 'epoch': 3} {'type': 'loss', 'content': 0.07800699770450592, 'timestamp': '2025-10-01 04:44:37.024401', 'step': 19622, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:37.078096', 'step': 19622, 'epoch': 3} {'type': 'loss', 'content': 0.062214650213718414, 'timestamp': '2025-10-01 04:44:37.080447', 'step': 19623, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:37.143235', 'step': 19623, 'epoch': 3} {'type': 'loss', 'content': 0.0801529735326767, 'timestamp': '2025-10-01 04:44:37.149096', 'step': 19624, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:37.202330', 'step': 19624, 'epoch': 3} {'type': 'loss', 'content': 0.059876687824726105, 'timestamp': '2025-10-01 04:44:37.205243', 'step': 19625, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:37.258826', 'step': 19625, 'epoch': 3} {'type': 'loss', 'content': 0.06978582590818405, 'timestamp': '2025-10-01 04:44:37.260931', 'step': 19626, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:37.314743', 'step': 19626, 'epoch': 3} {'type': 'loss', 'content': 0.048627834767103195, 'timestamp': '2025-10-01 04:44:37.318817', 'step': 19627, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:37.372297', 'step': 19627, 'epoch': 3} {'type': 'loss', 'content': 0.07126891613006592, 'timestamp': '2025-10-01 04:44:37.377840', 'step': 19628, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:44:37.441414', 'step': 19628, 'epoch': 3} {'type': 'loss', 'content': 0.034874264150857925, 'timestamp': '2025-10-01 04:44:37.443631', 'step': 19629, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:37.497524', 'step': 19629, 'epoch': 3} {'type': 'loss', 'content': 0.06828623265028, 'timestamp': '2025-10-01 04:44:37.501939', 'step': 19630, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:37.555596', 'step': 19630, 'epoch': 3} {'type': 'loss', 'content': 0.11881802976131439, 'timestamp': '2025-10-01 04:44:37.557741', 'step': 19631, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:37.611541', 'step': 19631, 'epoch': 3} {'type': 'loss', 'content': 0.1287396103143692, 'timestamp': '2025-10-01 04:44:37.617332', 'step': 19632, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:37.671141', 'step': 19632, 'epoch': 3} {'type': 'loss', 'content': 0.1326337307691574, 'timestamp': '2025-10-01 04:44:37.675095', 'step': 19633, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:37.728494', 'step': 19633, 'epoch': 3} {'type': 'loss', 'content': 0.2297360748052597, 'timestamp': '2025-10-01 04:44:37.730684', 'step': 19634, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:37.785139', 'step': 19634, 'epoch': 3} {'type': 'loss', 'content': 0.1719541698694229, 'timestamp': '2025-10-01 04:44:37.787288', 'step': 19635, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:37.844049', 'step': 19635, 'epoch': 3} {'type': 'loss', 'content': 0.1597049981355667, 'timestamp': '2025-10-01 04:44:37.850782', 'step': 19636, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:37.904318', 'step': 19636, 'epoch': 3} {'type': 'loss', 'content': 0.10683489590883255, 'timestamp': '2025-10-01 04:44:37.906520', 'step': 19637, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:37.959706', 'step': 19637, 'epoch': 3} {'type': 'loss', 'content': 0.06720767170190811, 'timestamp': '2025-10-01 04:44:37.962537', 'step': 19638, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:44:38.016433', 'step': 19638, 'epoch': 3} {'type': 'loss', 'content': 0.11226905137300491, 'timestamp': '2025-10-01 04:44:38.018553', 'step': 19639, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:38.073706', 'step': 19639, 'epoch': 3} {'type': 'loss', 'content': 0.034097593277692795, 'timestamp': '2025-10-01 04:44:38.079507', 'step': 19640, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:38.132905', 'step': 19640, 'epoch': 3} {'type': 'loss', 'content': 0.16689260303974152, 'timestamp': '2025-10-01 04:44:38.134992', 'step': 19641, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:44:38.190873', 'step': 19641, 'epoch': 3} {'type': 'loss', 'content': 0.07501894980669022, 'timestamp': '2025-10-01 04:44:38.198382', 'step': 19642, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:38.254304', 'step': 19642, 'epoch': 3} {'type': 'loss', 'content': 0.05969664081931114, 'timestamp': '2025-10-01 04:44:38.256462', 'step': 19643, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:38.310220', 'step': 19643, 'epoch': 3} {'type': 'loss', 'content': 0.025335056707262993, 'timestamp': '2025-10-01 04:44:38.315997', 'step': 19644, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:38.371583', 'step': 19644, 'epoch': 3} {'type': 'loss', 'content': 0.049764394760131836, 'timestamp': '2025-10-01 04:44:38.373708', 'step': 19645, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:38.429473', 'step': 19645, 'epoch': 3} {'type': 'loss', 'content': 0.2058858424425125, 'timestamp': '2025-10-01 04:44:38.437798', 'step': 19646, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:38.491384', 'step': 19646, 'epoch': 3} {'type': 'loss', 'content': 0.1661287397146225, 'timestamp': '2025-10-01 04:44:38.493568', 'step': 19647, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:44:38.549593', 'step': 19647, 'epoch': 3} {'type': 'loss', 'content': 0.17655539512634277, 'timestamp': '2025-10-01 04:44:38.555180', 'step': 19648, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:38.610190', 'step': 19648, 'epoch': 3} {'type': 'loss', 'content': 0.03948596864938736, 'timestamp': '2025-10-01 04:44:38.618018', 'step': 19649, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:38.671345', 'step': 19649, 'epoch': 3} {'type': 'loss', 'content': 0.039002858102321625, 'timestamp': '2025-10-01 04:44:38.673791', 'step': 19650, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:38.727538', 'step': 19650, 'epoch': 3} {'type': 'loss', 'content': 0.13144659996032715, 'timestamp': '2025-10-01 04:44:38.730361', 'step': 19651, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:38.785717', 'step': 19651, 'epoch': 3} {'type': 'loss', 'content': 0.04753813147544861, 'timestamp': '2025-10-01 04:44:38.793593', 'step': 19652, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:38.846386', 'step': 19652, 'epoch': 3} {'type': 'loss', 'content': 0.11449144035577774, 'timestamp': '2025-10-01 04:44:38.848450', 'step': 19653, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:38.912428', 'step': 19653, 'epoch': 3} {'type': 'loss', 'content': 0.06511163711547852, 'timestamp': '2025-10-01 04:44:38.914479', 'step': 19654, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:44:38.969101', 'step': 19654, 'epoch': 3} {'type': 'loss', 'content': 0.18643802404403687, 'timestamp': '2025-10-01 04:44:38.980125', 'step': 19655, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:39.033544', 'step': 19655, 'epoch': 3} {'type': 'loss', 'content': 0.09870204329490662, 'timestamp': '2025-10-01 04:44:39.039305', 'step': 19656, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:39.091969', 'step': 19656, 'epoch': 3} {'type': 'loss', 'content': 0.03435872495174408, 'timestamp': '2025-10-01 04:44:39.094271', 'step': 19657, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:39.147173', 'step': 19657, 'epoch': 3} {'type': 'loss', 'content': 0.10497844219207764, 'timestamp': '2025-10-01 04:44:39.162604', 'step': 19658, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:39.228501', 'step': 19658, 'epoch': 3} {'type': 'loss', 'content': 0.05872157961130142, 'timestamp': '2025-10-01 04:44:39.230671', 'step': 19659, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:39.285354', 'step': 19659, 'epoch': 3} {'type': 'loss', 'content': 0.07791781425476074, 'timestamp': '2025-10-01 04:44:39.300059', 'step': 19660, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:39.358640', 'step': 19660, 'epoch': 3} {'type': 'loss', 'content': 0.13584721088409424, 'timestamp': '2025-10-01 04:44:39.360614', 'step': 19661, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:39.414716', 'step': 19661, 'epoch': 3} {'type': 'loss', 'content': 0.04426103085279465, 'timestamp': '2025-10-01 04:44:39.417503', 'step': 19662, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:39.471126', 'step': 19662, 'epoch': 3} {'type': 'loss', 'content': 0.07155932486057281, 'timestamp': '2025-10-01 04:44:39.473740', 'step': 19663, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:44:39.528561', 'step': 19663, 'epoch': 3} {'type': 'loss', 'content': 0.029852574691176414, 'timestamp': '2025-10-01 04:44:39.534536', 'step': 19664, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:44:39.587535', 'step': 19664, 'epoch': 3} {'type': 'loss', 'content': 0.0670565590262413, 'timestamp': '2025-10-01 04:44:39.589831', 'step': 19665, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:39.650759', 'step': 19665, 'epoch': 3} {'type': 'loss', 'content': 0.17832325398921967, 'timestamp': '2025-10-01 04:44:39.653033', 'step': 19666, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:39.706543', 'step': 19666, 'epoch': 3} {'type': 'loss', 'content': 0.0733063668012619, 'timestamp': '2025-10-01 04:44:39.708651', 'step': 19667, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:39.761689', 'step': 19667, 'epoch': 3} {'type': 'loss', 'content': 0.04198981076478958, 'timestamp': '2025-10-01 04:44:39.767394', 'step': 19668, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:44:39.820097', 'step': 19668, 'epoch': 3} {'type': 'loss', 'content': 0.1067308858036995, 'timestamp': '2025-10-01 04:44:39.822392', 'step': 19669, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:39.876106', 'step': 19669, 'epoch': 3} {'type': 'loss', 'content': 0.023220321163535118, 'timestamp': '2025-10-01 04:44:39.886226', 'step': 19670, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:39.939548', 'step': 19670, 'epoch': 3} {'type': 'loss', 'content': 0.13619959354400635, 'timestamp': '2025-10-01 04:44:39.941700', 'step': 19671, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:44:39.995400', 'step': 19671, 'epoch': 3} {'type': 'loss', 'content': 0.1014300063252449, 'timestamp': '2025-10-01 04:44:40.001181', 'step': 19672, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:40.053816', 'step': 19672, 'epoch': 3} {'type': 'loss', 'content': 0.16474030911922455, 'timestamp': '2025-10-01 04:44:40.056055', 'step': 19673, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:40.116546', 'step': 19673, 'epoch': 3} {'type': 'loss', 'content': 0.16565470397472382, 'timestamp': '2025-10-01 04:44:40.118780', 'step': 19674, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:40.171507', 'step': 19674, 'epoch': 3} {'type': 'loss', 'content': 0.14264318346977234, 'timestamp': '2025-10-01 04:44:40.173769', 'step': 19675, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:40.228075', 'step': 19675, 'epoch': 3} {'type': 'loss', 'content': 0.11126726865768433, 'timestamp': '2025-10-01 04:44:40.233856', 'step': 19676, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:40.287303', 'step': 19676, 'epoch': 3} {'type': 'loss', 'content': 0.10349208116531372, 'timestamp': '2025-10-01 04:44:40.289912', 'step': 19677, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:40.342962', 'step': 19677, 'epoch': 3} {'type': 'loss', 'content': 0.09189677983522415, 'timestamp': '2025-10-01 04:44:40.345024', 'step': 19678, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:44:40.398565', 'step': 19678, 'epoch': 3} {'type': 'loss', 'content': 0.10286783427000046, 'timestamp': '2025-10-01 04:44:40.400801', 'step': 19679, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:40.454472', 'step': 19679, 'epoch': 3} {'type': 'loss', 'content': 0.05448955297470093, 'timestamp': '2025-10-01 04:44:40.460398', 'step': 19680, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:40.513000', 'step': 19680, 'epoch': 3} {'type': 'loss', 'content': 0.13640403747558594, 'timestamp': '2025-10-01 04:44:40.515326', 'step': 19681, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:40.570783', 'step': 19681, 'epoch': 3} {'type': 'loss', 'content': 0.03063249960541725, 'timestamp': '2025-10-01 04:44:40.585339', 'step': 19682, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:40.638747', 'step': 19682, 'epoch': 3} {'type': 'loss', 'content': 0.037504833191633224, 'timestamp': '2025-10-01 04:44:40.641100', 'step': 19683, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:44:40.694822', 'step': 19683, 'epoch': 3} {'type': 'loss', 'content': 0.09244014322757721, 'timestamp': '2025-10-01 04:44:40.700638', 'step': 19684, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:40.767579', 'step': 19684, 'epoch': 3} {'type': 'loss', 'content': 0.10307051241397858, 'timestamp': '2025-10-01 04:44:40.769826', 'step': 19685, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:44:40.825631', 'step': 19685, 'epoch': 3} {'type': 'loss', 'content': 0.18471018970012665, 'timestamp': '2025-10-01 04:44:40.828109', 'step': 19686, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:40.882832', 'step': 19686, 'epoch': 3} {'type': 'loss', 'content': 0.09518320113420486, 'timestamp': '2025-10-01 04:44:40.884882', 'step': 19687, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:44:40.939352', 'step': 19687, 'epoch': 3} {'type': 'loss', 'content': 0.09698990732431412, 'timestamp': '2025-10-01 04:44:40.945667', 'step': 19688, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:40.999851', 'step': 19688, 'epoch': 3} {'type': 'loss', 'content': 0.07727274298667908, 'timestamp': '2025-10-01 04:44:41.002029', 'step': 19689, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:41.066230', 'step': 19689, 'epoch': 3} {'type': 'loss', 'content': 0.13128286600112915, 'timestamp': '2025-10-01 04:44:41.068464', 'step': 19690, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:41.123250', 'step': 19690, 'epoch': 3} {'type': 'loss', 'content': 0.05198583006858826, 'timestamp': '2025-10-01 04:44:41.125425', 'step': 19691, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:41.190821', 'step': 19691, 'epoch': 3} {'type': 'loss', 'content': 0.03576663136482239, 'timestamp': '2025-10-01 04:44:41.197090', 'step': 19692, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:41.250707', 'step': 19692, 'epoch': 3} {'type': 'loss', 'content': 0.14921920001506805, 'timestamp': '2025-10-01 04:44:41.252425', 'step': 19693, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:41.305702', 'step': 19693, 'epoch': 3} {'type': 'loss', 'content': 0.14171577990055084, 'timestamp': '2025-10-01 04:44:41.307779', 'step': 19694, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:41.361087', 'step': 19694, 'epoch': 3} {'type': 'loss', 'content': 0.11206613481044769, 'timestamp': '2025-10-01 04:44:41.363539', 'step': 19695, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:41.416473', 'step': 19695, 'epoch': 3} {'type': 'loss', 'content': 0.042040739208459854, 'timestamp': '2025-10-01 04:44:41.422371', 'step': 19696, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:41.474897', 'step': 19696, 'epoch': 3} {'type': 'loss', 'content': 0.04522142559289932, 'timestamp': '2025-10-01 04:44:41.477030', 'step': 19697, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:41.530484', 'step': 19697, 'epoch': 3} {'type': 'loss', 'content': 0.059760548174381256, 'timestamp': '2025-10-01 04:44:41.533072', 'step': 19698, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:41.586433', 'step': 19698, 'epoch': 3} {'type': 'loss', 'content': 0.08906298130750656, 'timestamp': '2025-10-01 04:44:41.588822', 'step': 19699, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:41.642833', 'step': 19699, 'epoch': 3} {'type': 'loss', 'content': 0.06177089735865593, 'timestamp': '2025-10-01 04:44:41.648634', 'step': 19700, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:44:41.704115', 'step': 19700, 'epoch': 3} {'type': 'loss', 'content': 0.1984236240386963, 'timestamp': '2025-10-01 04:44:41.706161', 'step': 19701, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:41.773707', 'step': 19701, 'epoch': 3} {'type': 'loss', 'content': 0.1409452110528946, 'timestamp': '2025-10-01 04:44:41.775872', 'step': 19702, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:41.852394', 'step': 19702, 'epoch': 3} {'type': 'loss', 'content': 0.07262381166219711, 'timestamp': '2025-10-01 04:44:41.858656', 'step': 19703, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:41.911557', 'step': 19703, 'epoch': 3} {'type': 'loss', 'content': 0.17045941948890686, 'timestamp': '2025-10-01 04:44:41.917577', 'step': 19704, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:41.970371', 'step': 19704, 'epoch': 3} {'type': 'loss', 'content': 0.06391151994466782, 'timestamp': '2025-10-01 04:44:41.972456', 'step': 19705, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:42.026385', 'step': 19705, 'epoch': 3} {'type': 'loss', 'content': 0.054338421672582626, 'timestamp': '2025-10-01 04:44:42.028528', 'step': 19706, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:42.081817', 'step': 19706, 'epoch': 3} {'type': 'loss', 'content': 0.09166532754898071, 'timestamp': '2025-10-01 04:44:42.083873', 'step': 19707, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:42.137423', 'step': 19707, 'epoch': 3} {'type': 'loss', 'content': 0.037678398191928864, 'timestamp': '2025-10-01 04:44:42.143454', 'step': 19708, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:42.196738', 'step': 19708, 'epoch': 3} {'type': 'loss', 'content': 0.08480522036552429, 'timestamp': '2025-10-01 04:44:42.201850', 'step': 19709, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:42.257336', 'step': 19709, 'epoch': 3} {'type': 'loss', 'content': 0.09092186391353607, 'timestamp': '2025-10-01 04:44:42.259492', 'step': 19710, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:42.314185', 'step': 19710, 'epoch': 3} {'type': 'loss', 'content': 0.05568048357963562, 'timestamp': '2025-10-01 04:44:42.316447', 'step': 19711, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:44:42.377427', 'step': 19711, 'epoch': 3} {'type': 'loss', 'content': 0.17104406654834747, 'timestamp': '2025-10-01 04:44:42.384292', 'step': 19712, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:42.440035', 'step': 19712, 'epoch': 3} {'type': 'loss', 'content': 0.18562091886997223, 'timestamp': '2025-10-01 04:44:42.442547', 'step': 19713, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:42.514812', 'step': 19713, 'epoch': 3} {'type': 'loss', 'content': 0.08620929718017578, 'timestamp': '2025-10-01 04:44:42.517043', 'step': 19714, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:44:42.573109', 'step': 19714, 'epoch': 3} {'type': 'loss', 'content': 0.08428927510976791, 'timestamp': '2025-10-01 04:44:42.575608', 'step': 19715, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:42.644410', 'step': 19715, 'epoch': 3} {'type': 'loss', 'content': 0.05707010254263878, 'timestamp': '2025-10-01 04:44:42.650891', 'step': 19716, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:44:42.708743', 'step': 19716, 'epoch': 3} {'type': 'loss', 'content': 0.12923870980739594, 'timestamp': '2025-10-01 04:44:42.711115', 'step': 19717, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:42.766288', 'step': 19717, 'epoch': 3} {'type': 'loss', 'content': 0.12470393627882004, 'timestamp': '2025-10-01 04:44:42.768490', 'step': 19718, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:42.834089', 'step': 19718, 'epoch': 3} {'type': 'loss', 'content': 0.0742354542016983, 'timestamp': '2025-10-01 04:44:42.836291', 'step': 19719, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:42.891599', 'step': 19719, 'epoch': 3} {'type': 'loss', 'content': 0.06737131625413895, 'timestamp': '2025-10-01 04:44:42.898029', 'step': 19720, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:42.954028', 'step': 19720, 'epoch': 3} {'type': 'loss', 'content': 0.07324390858411789, 'timestamp': '2025-10-01 04:44:42.956192', 'step': 19721, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:43.011296', 'step': 19721, 'epoch': 3} {'type': 'loss', 'content': 0.08446735888719559, 'timestamp': '2025-10-01 04:44:43.013524', 'step': 19722, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:43.067532', 'step': 19722, 'epoch': 3} {'type': 'loss', 'content': 0.1153729036450386, 'timestamp': '2025-10-01 04:44:43.069731', 'step': 19723, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:43.123439', 'step': 19723, 'epoch': 3} {'type': 'loss', 'content': 0.14039063453674316, 'timestamp': '2025-10-01 04:44:43.130703', 'step': 19724, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:43.184998', 'step': 19724, 'epoch': 3} {'type': 'loss', 'content': 0.18937484920024872, 'timestamp': '2025-10-01 04:44:43.187076', 'step': 19725, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:43.242227', 'step': 19725, 'epoch': 3} {'type': 'loss', 'content': 0.08698178082704544, 'timestamp': '2025-10-01 04:44:43.244244', 'step': 19726, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:43.298326', 'step': 19726, 'epoch': 3} {'type': 'loss', 'content': 0.040912166237831116, 'timestamp': '2025-10-01 04:44:43.301052', 'step': 19727, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:43.359411', 'step': 19727, 'epoch': 3} {'type': 'loss', 'content': 0.10780198872089386, 'timestamp': '2025-10-01 04:44:43.365495', 'step': 19728, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:43.418461', 'step': 19728, 'epoch': 3} {'type': 'loss', 'content': 0.0710526630282402, 'timestamp': '2025-10-01 04:44:43.435238', 'step': 19729, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:44:43.508488', 'step': 19729, 'epoch': 3} {'type': 'loss', 'content': 0.1414804756641388, 'timestamp': '2025-10-01 04:44:43.511958', 'step': 19730, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:43.581437', 'step': 19730, 'epoch': 3} {'type': 'loss', 'content': 0.03840087354183197, 'timestamp': '2025-10-01 04:44:43.583455', 'step': 19731, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:43.638211', 'step': 19731, 'epoch': 3} {'type': 'loss', 'content': 0.11764959990978241, 'timestamp': '2025-10-01 04:44:43.644123', 'step': 19732, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:43.697070', 'step': 19732, 'epoch': 3} {'type': 'loss', 'content': 0.1413867175579071, 'timestamp': '2025-10-01 04:44:43.699353', 'step': 19733, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:43.752829', 'step': 19733, 'epoch': 3} {'type': 'loss', 'content': 0.08443951606750488, 'timestamp': '2025-10-01 04:44:43.754971', 'step': 19734, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:43.814170', 'step': 19734, 'epoch': 3} {'type': 'loss', 'content': 0.06595015525817871, 'timestamp': '2025-10-01 04:44:43.816363', 'step': 19735, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:43.870750', 'step': 19735, 'epoch': 3} {'type': 'loss', 'content': 0.06654966622591019, 'timestamp': '2025-10-01 04:44:43.876699', 'step': 19736, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:43.930021', 'step': 19736, 'epoch': 3} {'type': 'loss', 'content': 0.055924154818058014, 'timestamp': '2025-10-01 04:44:43.939515', 'step': 19737, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:44.015232', 'step': 19737, 'epoch': 3} {'type': 'loss', 'content': 0.07447577267885208, 'timestamp': '2025-10-01 04:44:44.020345', 'step': 19738, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:44.096135', 'step': 19738, 'epoch': 3} {'type': 'loss', 'content': 0.03776957467198372, 'timestamp': '2025-10-01 04:44:44.098774', 'step': 19739, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:44:44.153361', 'step': 19739, 'epoch': 3} {'type': 'loss', 'content': 0.058078207075595856, 'timestamp': '2025-10-01 04:44:44.159175', 'step': 19740, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:44.220025', 'step': 19740, 'epoch': 3} {'type': 'loss', 'content': 0.07221005856990814, 'timestamp': '2025-10-01 04:44:44.226843', 'step': 19741, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:44.280743', 'step': 19741, 'epoch': 3} {'type': 'loss', 'content': 0.13892364501953125, 'timestamp': '2025-10-01 04:44:44.283019', 'step': 19742, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:44:44.340053', 'step': 19742, 'epoch': 3} {'type': 'loss', 'content': 0.006905066780745983, 'timestamp': '2025-10-01 04:44:44.342296', 'step': 19743, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:44.406821', 'step': 19743, 'epoch': 3} {'type': 'loss', 'content': 0.04199112206697464, 'timestamp': '2025-10-01 04:44:44.412945', 'step': 19744, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:44.467000', 'step': 19744, 'epoch': 3} {'type': 'loss', 'content': 0.052793875336647034, 'timestamp': '2025-10-01 04:44:44.470110', 'step': 19745, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:44.524132', 'step': 19745, 'epoch': 3} {'type': 'loss', 'content': 0.04865923151373863, 'timestamp': '2025-10-01 04:44:44.526764', 'step': 19746, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:44.582015', 'step': 19746, 'epoch': 3} {'type': 'loss', 'content': 0.06331542134284973, 'timestamp': '2025-10-01 04:44:44.584327', 'step': 19747, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:44.638999', 'step': 19747, 'epoch': 3} {'type': 'loss', 'content': 0.01994386501610279, 'timestamp': '2025-10-01 04:44:44.644898', 'step': 19748, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:44.699104', 'step': 19748, 'epoch': 3} {'type': 'loss', 'content': 0.07516345381736755, 'timestamp': '2025-10-01 04:44:44.701247', 'step': 19749, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:44.755554', 'step': 19749, 'epoch': 3} {'type': 'loss', 'content': 0.08201857656240463, 'timestamp': '2025-10-01 04:44:44.757923', 'step': 19750, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:44.812116', 'step': 19750, 'epoch': 3} {'type': 'loss', 'content': 0.10237134248018265, 'timestamp': '2025-10-01 04:44:44.823414', 'step': 19751, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:44.878156', 'step': 19751, 'epoch': 3} {'type': 'loss', 'content': 0.1045551598072052, 'timestamp': '2025-10-01 04:44:44.884151', 'step': 19752, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:44:44.938490', 'step': 19752, 'epoch': 3} {'type': 'loss', 'content': 0.0324118509888649, 'timestamp': '2025-10-01 04:44:44.946775', 'step': 19753, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:45.002274', 'step': 19753, 'epoch': 3} {'type': 'loss', 'content': 0.12229501456022263, 'timestamp': '2025-10-01 04:44:45.004433', 'step': 19754, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:45.058936', 'step': 19754, 'epoch': 3} {'type': 'loss', 'content': 0.030588824301958084, 'timestamp': '2025-10-01 04:44:45.061322', 'step': 19755, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:45.118859', 'step': 19755, 'epoch': 3} {'type': 'loss', 'content': 0.11476235091686249, 'timestamp': '2025-10-01 04:44:45.125034', 'step': 19756, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:45.178495', 'step': 19756, 'epoch': 3} {'type': 'loss', 'content': 0.016607539728283882, 'timestamp': '2025-10-01 04:44:45.181137', 'step': 19757, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:45.235401', 'step': 19757, 'epoch': 3} {'type': 'loss', 'content': 0.06322768330574036, 'timestamp': '2025-10-01 04:44:45.237825', 'step': 19758, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:45.292567', 'step': 19758, 'epoch': 3} {'type': 'loss', 'content': 0.18695960938930511, 'timestamp': '2025-10-01 04:44:45.295027', 'step': 19759, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:45.351945', 'step': 19759, 'epoch': 3} {'type': 'loss', 'content': 0.04810907319188118, 'timestamp': '2025-10-01 04:44:45.358124', 'step': 19760, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:44:45.419985', 'step': 19760, 'epoch': 3} {'type': 'loss', 'content': 0.07169586420059204, 'timestamp': '2025-10-01 04:44:45.422575', 'step': 19761, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:45.476521', 'step': 19761, 'epoch': 3} {'type': 'loss', 'content': 0.07841018587350845, 'timestamp': '2025-10-01 04:44:45.479034', 'step': 19762, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:44:45.534068', 'step': 19762, 'epoch': 3} {'type': 'loss', 'content': 0.1592492014169693, 'timestamp': '2025-10-01 04:44:45.536536', 'step': 19763, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:45.590950', 'step': 19763, 'epoch': 3} {'type': 'loss', 'content': 0.08731188625097275, 'timestamp': '2025-10-01 04:44:45.597447', 'step': 19764, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:45.650149', 'step': 19764, 'epoch': 3} {'type': 'loss', 'content': 0.06258594989776611, 'timestamp': '2025-10-01 04:44:45.652429', 'step': 19765, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:45.706436', 'step': 19765, 'epoch': 3} {'type': 'loss', 'content': 0.04476908966898918, 'timestamp': '2025-10-01 04:44:45.718082', 'step': 19766, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:45.772526', 'step': 19766, 'epoch': 3} {'type': 'loss', 'content': 0.127130389213562, 'timestamp': '2025-10-01 04:44:45.774914', 'step': 19767, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:45.827924', 'step': 19767, 'epoch': 3} {'type': 'loss', 'content': 0.10366944223642349, 'timestamp': '2025-10-01 04:44:45.833582', 'step': 19768, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:45.888254', 'step': 19768, 'epoch': 3} {'type': 'loss', 'content': 0.10867122560739517, 'timestamp': '2025-10-01 04:44:45.893449', 'step': 19769, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:45.946696', 'step': 19769, 'epoch': 3} {'type': 'loss', 'content': 0.10895873606204987, 'timestamp': '2025-10-01 04:44:45.949011', 'step': 19770, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:46.003073', 'step': 19770, 'epoch': 3} {'type': 'loss', 'content': 0.08179180324077606, 'timestamp': '2025-10-01 04:44:46.005102', 'step': 19771, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:44:46.071385', 'step': 19771, 'epoch': 3} {'type': 'loss', 'content': 0.09481231123209, 'timestamp': '2025-10-01 04:44:46.078084', 'step': 19772, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:46.139273', 'step': 19772, 'epoch': 3} {'type': 'loss', 'content': 0.03180721402168274, 'timestamp': '2025-10-01 04:44:46.141865', 'step': 19773, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:46.195433', 'step': 19773, 'epoch': 3} {'type': 'loss', 'content': 0.06364531069993973, 'timestamp': '2025-10-01 04:44:46.197572', 'step': 19774, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:46.252628', 'step': 19774, 'epoch': 3} {'type': 'loss', 'content': 0.08182942122220993, 'timestamp': '2025-10-01 04:44:46.254720', 'step': 19775, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:44:46.308397', 'step': 19775, 'epoch': 3} {'type': 'loss', 'content': 0.1312832236289978, 'timestamp': '2025-10-01 04:44:46.314173', 'step': 19776, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:44:46.366946', 'step': 19776, 'epoch': 3} {'type': 'loss', 'content': 0.04283100739121437, 'timestamp': '2025-10-01 04:44:46.369047', 'step': 19777, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:46.422340', 'step': 19777, 'epoch': 3} {'type': 'loss', 'content': 0.15141236782073975, 'timestamp': '2025-10-01 04:44:46.424425', 'step': 19778, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:46.478523', 'step': 19778, 'epoch': 3} {'type': 'loss', 'content': 0.1091424748301506, 'timestamp': '2025-10-01 04:44:46.480602', 'step': 19779, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:46.534333', 'step': 19779, 'epoch': 3} {'type': 'loss', 'content': 0.04421977698802948, 'timestamp': '2025-10-01 04:44:46.540147', 'step': 19780, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:46.593125', 'step': 19780, 'epoch': 3} {'type': 'loss', 'content': 0.09617582708597183, 'timestamp': '2025-10-01 04:44:46.595454', 'step': 19781, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:46.650750', 'step': 19781, 'epoch': 3} {'type': 'loss', 'content': 0.06902210414409637, 'timestamp': '2025-10-01 04:44:46.652846', 'step': 19782, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:44:46.706195', 'step': 19782, 'epoch': 3} {'type': 'loss', 'content': 0.11773575842380524, 'timestamp': '2025-10-01 04:44:46.708241', 'step': 19783, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:46.761474', 'step': 19783, 'epoch': 3} {'type': 'loss', 'content': 0.06544627994298935, 'timestamp': '2025-10-01 04:44:46.767156', 'step': 19784, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:46.819663', 'step': 19784, 'epoch': 3} {'type': 'loss', 'content': 0.03610706329345703, 'timestamp': '2025-10-01 04:44:46.821775', 'step': 19785, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:44:46.875388', 'step': 19785, 'epoch': 3} {'type': 'loss', 'content': 0.10219351202249527, 'timestamp': '2025-10-01 04:44:46.877673', 'step': 19786, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:46.931196', 'step': 19786, 'epoch': 3} {'type': 'loss', 'content': 0.09172194451093674, 'timestamp': '2025-10-01 04:44:46.933261', 'step': 19787, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:46.986131', 'step': 19787, 'epoch': 3} {'type': 'loss', 'content': 0.11406081914901733, 'timestamp': '2025-10-01 04:44:46.991810', 'step': 19788, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:47.045119', 'step': 19788, 'epoch': 3} {'type': 'loss', 'content': 0.2001192271709442, 'timestamp': '2025-10-01 04:44:47.047202', 'step': 19789, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:47.100911', 'step': 19789, 'epoch': 3} {'type': 'loss', 'content': 0.10183939337730408, 'timestamp': '2025-10-01 04:44:47.103506', 'step': 19790, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:47.157507', 'step': 19790, 'epoch': 3} {'type': 'loss', 'content': 0.08021321147680283, 'timestamp': '2025-10-01 04:44:47.159618', 'step': 19791, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:47.213094', 'step': 19791, 'epoch': 3} {'type': 'loss', 'content': 0.04009821265935898, 'timestamp': '2025-10-01 04:44:47.219544', 'step': 19792, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:47.272900', 'step': 19792, 'epoch': 3} {'type': 'loss', 'content': 0.060812827199697495, 'timestamp': '2025-10-01 04:44:47.275174', 'step': 19793, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:47.329301', 'step': 19793, 'epoch': 3} {'type': 'loss', 'content': 0.06700310111045837, 'timestamp': '2025-10-01 04:44:47.331599', 'step': 19794, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:47.385485', 'step': 19794, 'epoch': 3} {'type': 'loss', 'content': 0.07747484743595123, 'timestamp': '2025-10-01 04:44:47.387766', 'step': 19795, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:47.473329', 'step': 19795, 'epoch': 3} {'type': 'loss', 'content': 0.07074052840471268, 'timestamp': '2025-10-01 04:44:47.479146', 'step': 19796, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:47.548399', 'step': 19796, 'epoch': 3} {'type': 'loss', 'content': 0.13336840271949768, 'timestamp': '2025-10-01 04:44:47.550478', 'step': 19797, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:44:47.603923', 'step': 19797, 'epoch': 3} {'type': 'loss', 'content': 0.06209355965256691, 'timestamp': '2025-10-01 04:44:47.606111', 'step': 19798, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:47.659817', 'step': 19798, 'epoch': 3} {'type': 'loss', 'content': 0.14294841885566711, 'timestamp': '2025-10-01 04:44:47.661986', 'step': 19799, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:47.715550', 'step': 19799, 'epoch': 3} {'type': 'loss', 'content': 0.10004397481679916, 'timestamp': '2025-10-01 04:44:47.721308', 'step': 19800, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:47.773702', 'step': 19800, 'epoch': 3} {'type': 'loss', 'content': 0.14333036541938782, 'timestamp': '2025-10-01 04:44:47.780239', 'step': 19801, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:47.833348', 'step': 19801, 'epoch': 3} {'type': 'loss', 'content': 0.08882081508636475, 'timestamp': '2025-10-01 04:44:47.835822', 'step': 19802, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:44:47.889800', 'step': 19802, 'epoch': 3} {'type': 'loss', 'content': 0.07726684212684631, 'timestamp': '2025-10-01 04:44:47.891947', 'step': 19803, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:47.945248', 'step': 19803, 'epoch': 3} {'type': 'loss', 'content': 0.07211887836456299, 'timestamp': '2025-10-01 04:44:47.950877', 'step': 19804, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:48.010068', 'step': 19804, 'epoch': 3} {'type': 'loss', 'content': 0.08858484029769897, 'timestamp': '2025-10-01 04:44:48.012129', 'step': 19805, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:48.066235', 'step': 19805, 'epoch': 3} {'type': 'loss', 'content': 0.11778578162193298, 'timestamp': '2025-10-01 04:44:48.070052', 'step': 19806, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:48.127893', 'step': 19806, 'epoch': 3} {'type': 'loss', 'content': 0.11849617958068848, 'timestamp': '2025-10-01 04:44:48.131416', 'step': 19807, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:48.198828', 'step': 19807, 'epoch': 3} {'type': 'loss', 'content': 0.07221173495054245, 'timestamp': '2025-10-01 04:44:48.204615', 'step': 19808, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:48.258415', 'step': 19808, 'epoch': 3} {'type': 'loss', 'content': 0.11906612664461136, 'timestamp': '2025-10-01 04:44:48.260610', 'step': 19809, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:48.314465', 'step': 19809, 'epoch': 3} {'type': 'loss', 'content': 0.10462049394845963, 'timestamp': '2025-10-01 04:44:48.316554', 'step': 19810, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:48.371284', 'step': 19810, 'epoch': 3} {'type': 'loss', 'content': 0.15207839012145996, 'timestamp': '2025-10-01 04:44:48.373395', 'step': 19811, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:48.427131', 'step': 19811, 'epoch': 3} {'type': 'loss', 'content': 0.050995249301195145, 'timestamp': '2025-10-01 04:44:48.433549', 'step': 19812, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:48.501553', 'step': 19812, 'epoch': 3} {'type': 'loss', 'content': 0.1436808556318283, 'timestamp': '2025-10-01 04:44:48.503785', 'step': 19813, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:48.557564', 'step': 19813, 'epoch': 3} {'type': 'loss', 'content': 0.11966808885335922, 'timestamp': '2025-10-01 04:44:48.567770', 'step': 19814, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:44:48.641091', 'step': 19814, 'epoch': 3} {'type': 'loss', 'content': 0.13992957770824432, 'timestamp': '2025-10-01 04:44:48.643192', 'step': 19815, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:48.698247', 'step': 19815, 'epoch': 3} {'type': 'loss', 'content': 0.049319177865982056, 'timestamp': '2025-10-01 04:44:48.703965', 'step': 19816, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:44:48.758875', 'step': 19816, 'epoch': 3} {'type': 'loss', 'content': 0.08109907060861588, 'timestamp': '2025-10-01 04:44:48.761015', 'step': 19817, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:48.814789', 'step': 19817, 'epoch': 3} {'type': 'loss', 'content': 0.050435978919267654, 'timestamp': '2025-10-01 04:44:48.816877', 'step': 19818, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:48.870575', 'step': 19818, 'epoch': 3} {'type': 'loss', 'content': 0.14655855298042297, 'timestamp': '2025-10-01 04:44:48.877760', 'step': 19819, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:44:48.942256', 'step': 19819, 'epoch': 3} {'type': 'loss', 'content': 0.1553768664598465, 'timestamp': '2025-10-01 04:44:48.951863', 'step': 19820, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:49.004909', 'step': 19820, 'epoch': 3} {'type': 'loss', 'content': 0.05005713552236557, 'timestamp': '2025-10-01 04:44:49.007314', 'step': 19821, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:44:49.062781', 'step': 19821, 'epoch': 3} {'type': 'loss', 'content': 0.11812657862901688, 'timestamp': '2025-10-01 04:44:49.064879', 'step': 19822, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:49.118246', 'step': 19822, 'epoch': 3} {'type': 'loss', 'content': 0.05863591656088829, 'timestamp': '2025-10-01 04:44:49.120602', 'step': 19823, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:49.174244', 'step': 19823, 'epoch': 3} {'type': 'loss', 'content': 0.02425490878522396, 'timestamp': '2025-10-01 04:44:49.180026', 'step': 19824, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:49.232679', 'step': 19824, 'epoch': 3} {'type': 'loss', 'content': 0.0625426173210144, 'timestamp': '2025-10-01 04:44:49.234973', 'step': 19825, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:49.288032', 'step': 19825, 'epoch': 3} {'type': 'loss', 'content': 0.08661842346191406, 'timestamp': '2025-10-01 04:44:49.290088', 'step': 19826, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:49.343595', 'step': 19826, 'epoch': 3} {'type': 'loss', 'content': 0.09707364439964294, 'timestamp': '2025-10-01 04:44:49.345645', 'step': 19827, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:49.399240', 'step': 19827, 'epoch': 3} {'type': 'loss', 'content': 0.10291565209627151, 'timestamp': '2025-10-01 04:44:49.404922', 'step': 19828, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:49.457862', 'step': 19828, 'epoch': 3} {'type': 'loss', 'content': 0.04753740876913071, 'timestamp': '2025-10-01 04:44:49.460071', 'step': 19829, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:49.517493', 'step': 19829, 'epoch': 3} {'type': 'loss', 'content': 0.06527866423130035, 'timestamp': '2025-10-01 04:44:49.519628', 'step': 19830, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:49.581585', 'step': 19830, 'epoch': 3} {'type': 'loss', 'content': 0.12346875667572021, 'timestamp': '2025-10-01 04:44:49.583745', 'step': 19831, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:49.637309', 'step': 19831, 'epoch': 3} {'type': 'loss', 'content': 0.13186873495578766, 'timestamp': '2025-10-01 04:44:49.642978', 'step': 19832, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:49.697400', 'step': 19832, 'epoch': 3} {'type': 'loss', 'content': 0.050527725368738174, 'timestamp': '2025-10-01 04:44:49.699502', 'step': 19833, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:49.752858', 'step': 19833, 'epoch': 3} {'type': 'loss', 'content': 0.21233977377414703, 'timestamp': '2025-10-01 04:44:49.755542', 'step': 19834, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:49.809898', 'step': 19834, 'epoch': 3} {'type': 'loss', 'content': 0.07180396467447281, 'timestamp': '2025-10-01 04:44:49.811975', 'step': 19835, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:49.865310', 'step': 19835, 'epoch': 3} {'type': 'loss', 'content': 0.08623682707548141, 'timestamp': '2025-10-01 04:44:49.871047', 'step': 19836, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:49.923575', 'step': 19836, 'epoch': 3} {'type': 'loss', 'content': 0.12750037014484406, 'timestamp': '2025-10-01 04:44:49.925899', 'step': 19837, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:49.979035', 'step': 19837, 'epoch': 3} {'type': 'loss', 'content': 0.07413723319768906, 'timestamp': '2025-10-01 04:44:49.981349', 'step': 19838, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:50.034845', 'step': 19838, 'epoch': 3} {'type': 'loss', 'content': 0.0514666773378849, 'timestamp': '2025-10-01 04:44:50.037171', 'step': 19839, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:50.090416', 'step': 19839, 'epoch': 3} {'type': 'loss', 'content': 0.08898159116506577, 'timestamp': '2025-10-01 04:44:50.096211', 'step': 19840, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:50.149043', 'step': 19840, 'epoch': 3} {'type': 'loss', 'content': 0.06730460375547409, 'timestamp': '2025-10-01 04:44:50.158729', 'step': 19841, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:50.212923', 'step': 19841, 'epoch': 3} {'type': 'loss', 'content': 0.04272634536027908, 'timestamp': '2025-10-01 04:44:50.215292', 'step': 19842, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:50.291125', 'step': 19842, 'epoch': 3} {'type': 'loss', 'content': 0.09850271791219711, 'timestamp': '2025-10-01 04:44:50.293279', 'step': 19843, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:50.346342', 'step': 19843, 'epoch': 3} {'type': 'loss', 'content': 0.0563495047390461, 'timestamp': '2025-10-01 04:44:50.352049', 'step': 19844, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:50.411104', 'step': 19844, 'epoch': 3} {'type': 'loss', 'content': 0.0780646950006485, 'timestamp': '2025-10-01 04:44:50.413327', 'step': 19845, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:50.468742', 'step': 19845, 'epoch': 3} {'type': 'loss', 'content': 0.07008214294910431, 'timestamp': '2025-10-01 04:44:50.470779', 'step': 19846, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:44:50.524169', 'step': 19846, 'epoch': 3} {'type': 'loss', 'content': 0.21037113666534424, 'timestamp': '2025-10-01 04:44:50.526592', 'step': 19847, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:50.579656', 'step': 19847, 'epoch': 3} {'type': 'loss', 'content': 0.07949984073638916, 'timestamp': '2025-10-01 04:44:50.588192', 'step': 19848, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:50.644011', 'step': 19848, 'epoch': 3} {'type': 'loss', 'content': 0.11090198159217834, 'timestamp': '2025-10-01 04:44:50.646107', 'step': 19849, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:44:50.712012', 'step': 19849, 'epoch': 3} {'type': 'loss', 'content': 0.08832970261573792, 'timestamp': '2025-10-01 04:44:50.714076', 'step': 19850, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:50.767412', 'step': 19850, 'epoch': 3} {'type': 'loss', 'content': 0.07814134657382965, 'timestamp': '2025-10-01 04:44:50.774083', 'step': 19851, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:50.828409', 'step': 19851, 'epoch': 3} {'type': 'loss', 'content': 0.10784703493118286, 'timestamp': '2025-10-01 04:44:50.834429', 'step': 19852, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:50.888216', 'step': 19852, 'epoch': 3} {'type': 'loss', 'content': 0.03715066611766815, 'timestamp': '2025-10-01 04:44:50.890326', 'step': 19853, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:50.943374', 'step': 19853, 'epoch': 3} {'type': 'loss', 'content': 0.1316739171743393, 'timestamp': '2025-10-01 04:44:50.947736', 'step': 19854, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:51.001735', 'step': 19854, 'epoch': 3} {'type': 'loss', 'content': 0.08061297982931137, 'timestamp': '2025-10-01 04:44:51.004229', 'step': 19855, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:51.057750', 'step': 19855, 'epoch': 3} {'type': 'loss', 'content': 0.08128266781568527, 'timestamp': '2025-10-01 04:44:51.063422', 'step': 19856, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:51.116903', 'step': 19856, 'epoch': 3} {'type': 'loss', 'content': 0.09982921928167343, 'timestamp': '2025-10-01 04:44:51.119296', 'step': 19857, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:51.173037', 'step': 19857, 'epoch': 3} {'type': 'loss', 'content': 0.0949971154332161, 'timestamp': '2025-10-01 04:44:51.192089', 'step': 19858, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:51.246885', 'step': 19858, 'epoch': 3} {'type': 'loss', 'content': 0.10155792534351349, 'timestamp': '2025-10-01 04:44:51.249032', 'step': 19859, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:51.302543', 'step': 19859, 'epoch': 3} {'type': 'loss', 'content': 0.07644723355770111, 'timestamp': '2025-10-01 04:44:51.308435', 'step': 19860, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:51.364531', 'step': 19860, 'epoch': 3} {'type': 'loss', 'content': 0.09016259014606476, 'timestamp': '2025-10-01 04:44:51.368955', 'step': 19861, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:51.423731', 'step': 19861, 'epoch': 3} {'type': 'loss', 'content': 0.11441630125045776, 'timestamp': '2025-10-01 04:44:51.425959', 'step': 19862, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:51.487030', 'step': 19862, 'epoch': 3} {'type': 'loss', 'content': 0.04596053063869476, 'timestamp': '2025-10-01 04:44:51.489281', 'step': 19863, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:51.542683', 'step': 19863, 'epoch': 3} {'type': 'loss', 'content': 0.19531863927841187, 'timestamp': '2025-10-01 04:44:51.548346', 'step': 19864, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:51.601625', 'step': 19864, 'epoch': 3} {'type': 'loss', 'content': 0.1203942820429802, 'timestamp': '2025-10-01 04:44:51.603629', 'step': 19865, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:44:51.657259', 'step': 19865, 'epoch': 3} {'type': 'loss', 'content': 0.16688628494739532, 'timestamp': '2025-10-01 04:44:51.659599', 'step': 19866, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:44:51.713535', 'step': 19866, 'epoch': 3} {'type': 'loss', 'content': 0.12565942108631134, 'timestamp': '2025-10-01 04:44:51.715827', 'step': 19867, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:51.768985', 'step': 19867, 'epoch': 3} {'type': 'loss', 'content': 0.05077708512544632, 'timestamp': '2025-10-01 04:44:51.774711', 'step': 19868, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:51.828058', 'step': 19868, 'epoch': 3} {'type': 'loss', 'content': 0.13271255791187286, 'timestamp': '2025-10-01 04:44:51.830097', 'step': 19869, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:51.882967', 'step': 19869, 'epoch': 3} {'type': 'loss', 'content': 0.07060713320970535, 'timestamp': '2025-10-01 04:44:51.885084', 'step': 19870, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:51.939359', 'step': 19870, 'epoch': 3} {'type': 'loss', 'content': 0.050060342997312546, 'timestamp': '2025-10-01 04:44:51.941540', 'step': 19871, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:44:52.006882', 'step': 19871, 'epoch': 3} {'type': 'loss', 'content': 0.10155151784420013, 'timestamp': '2025-10-01 04:44:52.012629', 'step': 19872, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:52.065574', 'step': 19872, 'epoch': 3} {'type': 'loss', 'content': 0.09857810288667679, 'timestamp': '2025-10-01 04:44:52.067804', 'step': 19873, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:52.121201', 'step': 19873, 'epoch': 3} {'type': 'loss', 'content': 0.06710780411958694, 'timestamp': '2025-10-01 04:44:52.123267', 'step': 19874, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:44:52.181679', 'step': 19874, 'epoch': 3} {'type': 'loss', 'content': 0.08348323404788971, 'timestamp': '2025-10-01 04:44:52.184002', 'step': 19875, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:52.238304', 'step': 19875, 'epoch': 3} {'type': 'loss', 'content': 0.08380231261253357, 'timestamp': '2025-10-01 04:44:52.247124', 'step': 19876, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:44:52.299988', 'step': 19876, 'epoch': 3} {'type': 'loss', 'content': 0.10216102004051208, 'timestamp': '2025-10-01 04:44:52.302113', 'step': 19877, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:52.355654', 'step': 19877, 'epoch': 3} {'type': 'loss', 'content': 0.08918212354183197, 'timestamp': '2025-10-01 04:44:52.357790', 'step': 19878, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:52.411285', 'step': 19878, 'epoch': 3} {'type': 'loss', 'content': 0.10804760456085205, 'timestamp': '2025-10-01 04:44:52.413496', 'step': 19879, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:52.466889', 'step': 19879, 'epoch': 3} {'type': 'loss', 'content': 0.08431963622570038, 'timestamp': '2025-10-01 04:44:52.474245', 'step': 19880, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:52.527601', 'step': 19880, 'epoch': 3} {'type': 'loss', 'content': 0.0904201865196228, 'timestamp': '2025-10-01 04:44:52.530847', 'step': 19881, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:52.584365', 'step': 19881, 'epoch': 3} {'type': 'loss', 'content': 0.14180563390254974, 'timestamp': '2025-10-01 04:44:52.586464', 'step': 19882, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:52.654051', 'step': 19882, 'epoch': 3} {'type': 'loss', 'content': 0.0526110902428627, 'timestamp': '2025-10-01 04:44:52.656319', 'step': 19883, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:52.709353', 'step': 19883, 'epoch': 3} {'type': 'loss', 'content': 0.08447841554880142, 'timestamp': '2025-10-01 04:44:52.715090', 'step': 19884, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:52.784861', 'step': 19884, 'epoch': 3} {'type': 'loss', 'content': 0.06345880031585693, 'timestamp': '2025-10-01 04:44:52.787132', 'step': 19885, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:52.840835', 'step': 19885, 'epoch': 3} {'type': 'loss', 'content': 0.1326855719089508, 'timestamp': '2025-10-01 04:44:52.852494', 'step': 19886, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:52.914968', 'step': 19886, 'epoch': 3} {'type': 'loss', 'content': 0.07503993809223175, 'timestamp': '2025-10-01 04:44:52.916990', 'step': 19887, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:52.970150', 'step': 19887, 'epoch': 3} {'type': 'loss', 'content': 0.02625754475593567, 'timestamp': '2025-10-01 04:44:52.975788', 'step': 19888, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:44:53.028573', 'step': 19888, 'epoch': 3} {'type': 'loss', 'content': 0.1767388880252838, 'timestamp': '2025-10-01 04:44:53.031076', 'step': 19889, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:53.085728', 'step': 19889, 'epoch': 3} {'type': 'loss', 'content': 0.10157745331525803, 'timestamp': '2025-10-01 04:44:53.088167', 'step': 19890, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:53.143526', 'step': 19890, 'epoch': 3} {'type': 'loss', 'content': 0.04947729781270027, 'timestamp': '2025-10-01 04:44:53.146032', 'step': 19891, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:53.210551', 'step': 19891, 'epoch': 3} {'type': 'loss', 'content': 0.014280445873737335, 'timestamp': '2025-10-01 04:44:53.216640', 'step': 19892, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:44:53.270805', 'step': 19892, 'epoch': 3} {'type': 'loss', 'content': 0.10695937275886536, 'timestamp': '2025-10-01 04:44:53.273351', 'step': 19893, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:53.328250', 'step': 19893, 'epoch': 3} {'type': 'loss', 'content': 0.15627621114253998, 'timestamp': '2025-10-01 04:44:53.330881', 'step': 19894, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:53.385436', 'step': 19894, 'epoch': 3} {'type': 'loss', 'content': 0.0515807569026947, 'timestamp': '2025-10-01 04:44:53.388105', 'step': 19895, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:44:53.445377', 'step': 19895, 'epoch': 3} {'type': 'loss', 'content': 0.08585264533758163, 'timestamp': '2025-10-01 04:44:53.451422', 'step': 19896, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:53.506368', 'step': 19896, 'epoch': 3} {'type': 'loss', 'content': 0.10327999293804169, 'timestamp': '2025-10-01 04:44:53.509488', 'step': 19897, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:44:53.564130', 'step': 19897, 'epoch': 3} {'type': 'loss', 'content': 0.07910678535699844, 'timestamp': '2025-10-01 04:44:53.566472', 'step': 19898, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:44:53.621181', 'step': 19898, 'epoch': 3} {'type': 'loss', 'content': 0.14409516751766205, 'timestamp': '2025-10-01 04:44:53.623484', 'step': 19899, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:53.676953', 'step': 19899, 'epoch': 3} {'type': 'loss', 'content': 0.10317132622003555, 'timestamp': '2025-10-01 04:44:53.683305', 'step': 19900, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:53.748581', 'step': 19900, 'epoch': 3} {'type': 'loss', 'content': 0.10318329930305481, 'timestamp': '2025-10-01 04:44:53.750788', 'step': 19901, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:44:53.806941', 'step': 19901, 'epoch': 3} {'type': 'loss', 'content': 0.06945359706878662, 'timestamp': '2025-10-01 04:44:53.809278', 'step': 19902, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:53.863316', 'step': 19902, 'epoch': 3} {'type': 'loss', 'content': 0.04907083138823509, 'timestamp': '2025-10-01 04:44:53.865625', 'step': 19903, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:53.932002', 'step': 19903, 'epoch': 3} {'type': 'loss', 'content': 0.09713799506425858, 'timestamp': '2025-10-01 04:44:53.937915', 'step': 19904, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:54.006016', 'step': 19904, 'epoch': 3} {'type': 'loss', 'content': 0.1401423215866089, 'timestamp': '2025-10-01 04:44:54.008366', 'step': 19905, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:44:54.062543', 'step': 19905, 'epoch': 3} {'type': 'loss', 'content': 0.1178094670176506, 'timestamp': '2025-10-01 04:44:54.064860', 'step': 19906, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:44:54.118976', 'step': 19906, 'epoch': 3} {'type': 'loss', 'content': 0.10043340921401978, 'timestamp': '2025-10-01 04:44:54.122175', 'step': 19907, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:54.177002', 'step': 19907, 'epoch': 3} {'type': 'loss', 'content': 0.09839606285095215, 'timestamp': '2025-10-01 04:44:54.186683', 'step': 19908, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:54.240838', 'step': 19908, 'epoch': 3} {'type': 'loss', 'content': 0.051368311047554016, 'timestamp': '2025-10-01 04:44:54.243437', 'step': 19909, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:54.297789', 'step': 19909, 'epoch': 3} {'type': 'loss', 'content': 0.07687103003263474, 'timestamp': '2025-10-01 04:44:54.300275', 'step': 19910, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:54.354662', 'step': 19910, 'epoch': 3} {'type': 'loss', 'content': 0.1788373440504074, 'timestamp': '2025-10-01 04:44:54.356835', 'step': 19911, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:44:54.410543', 'step': 19911, 'epoch': 3} {'type': 'loss', 'content': 0.12671060860157013, 'timestamp': '2025-10-01 04:44:54.416102', 'step': 19912, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:54.469938', 'step': 19912, 'epoch': 3} {'type': 'loss', 'content': 0.06326296925544739, 'timestamp': '2025-10-01 04:44:54.472374', 'step': 19913, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:54.531439', 'step': 19913, 'epoch': 3} {'type': 'loss', 'content': 0.19791176915168762, 'timestamp': '2025-10-01 04:44:54.533888', 'step': 19914, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:54.588402', 'step': 19914, 'epoch': 3} {'type': 'loss', 'content': 0.11123418807983398, 'timestamp': '2025-10-01 04:44:54.590516', 'step': 19915, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:54.651237', 'step': 19915, 'epoch': 3} {'type': 'loss', 'content': 0.07961703836917877, 'timestamp': '2025-10-01 04:44:54.656965', 'step': 19916, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:54.721300', 'step': 19916, 'epoch': 3} {'type': 'loss', 'content': 0.07952340692281723, 'timestamp': '2025-10-01 04:44:54.724073', 'step': 19917, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:44:54.778277', 'step': 19917, 'epoch': 3} {'type': 'loss', 'content': 0.1011878177523613, 'timestamp': '2025-10-01 04:44:54.780309', 'step': 19918, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:54.834210', 'step': 19918, 'epoch': 3} {'type': 'loss', 'content': 0.1511719673871994, 'timestamp': '2025-10-01 04:44:54.836378', 'step': 19919, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-01 04:44:54.889880', 'step': 19919, 'epoch': 3} {'type': 'loss', 'content': 0.05418141186237335, 'timestamp': '2025-10-01 04:44:54.896515', 'step': 19920, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:54.951455', 'step': 19920, 'epoch': 3} {'type': 'loss', 'content': 0.10439714044332504, 'timestamp': '2025-10-01 04:44:54.953780', 'step': 19921, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:55.007114', 'step': 19921, 'epoch': 3} {'type': 'loss', 'content': 0.07605624943971634, 'timestamp': '2025-10-01 04:44:55.009311', 'step': 19922, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:55.062543', 'step': 19922, 'epoch': 3} {'type': 'loss', 'content': 0.12063470482826233, 'timestamp': '2025-10-01 04:44:55.064978', 'step': 19923, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:55.122656', 'step': 19923, 'epoch': 3} {'type': 'loss', 'content': 0.0803029015660286, 'timestamp': '2025-10-01 04:44:55.128476', 'step': 19924, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:55.180995', 'step': 19924, 'epoch': 3} {'type': 'loss', 'content': 0.05220566317439079, 'timestamp': '2025-10-01 04:44:55.183011', 'step': 19925, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:55.235452', 'step': 19925, 'epoch': 3} {'type': 'loss', 'content': 0.07394649088382721, 'timestamp': '2025-10-01 04:44:55.237669', 'step': 19926, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:55.290682', 'step': 19926, 'epoch': 3} {'type': 'loss', 'content': 0.10654178261756897, 'timestamp': '2025-10-01 04:44:55.292836', 'step': 19927, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:55.345793', 'step': 19927, 'epoch': 3} {'type': 'loss', 'content': 0.07214139401912689, 'timestamp': '2025-10-01 04:44:55.351437', 'step': 19928, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:44:55.403864', 'step': 19928, 'epoch': 3} {'type': 'loss', 'content': 0.11993352323770523, 'timestamp': '2025-10-01 04:44:55.407010', 'step': 19929, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:55.460855', 'step': 19929, 'epoch': 3} {'type': 'loss', 'content': 0.11728055775165558, 'timestamp': '2025-10-01 04:44:55.462978', 'step': 19930, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:55.516623', 'step': 19930, 'epoch': 3} {'type': 'loss', 'content': 0.1471366137266159, 'timestamp': '2025-10-01 04:44:55.518665', 'step': 19931, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:55.572176', 'step': 19931, 'epoch': 3} {'type': 'loss', 'content': 0.06825197488069534, 'timestamp': '2025-10-01 04:44:55.577756', 'step': 19932, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:55.629942', 'step': 19932, 'epoch': 3} {'type': 'loss', 'content': 0.11522690206766129, 'timestamp': '2025-10-01 04:44:55.632139', 'step': 19933, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:55.685144', 'step': 19933, 'epoch': 3} {'type': 'loss', 'content': 0.10931435227394104, 'timestamp': '2025-10-01 04:44:55.689414', 'step': 19934, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:55.743167', 'step': 19934, 'epoch': 3} {'type': 'loss', 'content': 0.04266217350959778, 'timestamp': '2025-10-01 04:44:55.745332', 'step': 19935, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:55.799587', 'step': 19935, 'epoch': 3} {'type': 'loss', 'content': 0.09912502020597458, 'timestamp': '2025-10-01 04:44:55.805454', 'step': 19936, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:55.858392', 'step': 19936, 'epoch': 3} {'type': 'loss', 'content': 0.12936514616012573, 'timestamp': '2025-10-01 04:44:55.860673', 'step': 19937, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:55.915423', 'step': 19937, 'epoch': 3} {'type': 'loss', 'content': 0.12041684240102768, 'timestamp': '2025-10-01 04:44:55.917699', 'step': 19938, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:55.972586', 'step': 19938, 'epoch': 3} {'type': 'loss', 'content': 0.09778602421283722, 'timestamp': '2025-10-01 04:44:55.974849', 'step': 19939, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:56.029446', 'step': 19939, 'epoch': 3} {'type': 'loss', 'content': 0.09546402841806412, 'timestamp': '2025-10-01 04:44:56.035732', 'step': 19940, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:56.096755', 'step': 19940, 'epoch': 3} {'type': 'loss', 'content': 0.02521367184817791, 'timestamp': '2025-10-01 04:44:56.103761', 'step': 19941, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:56.159262', 'step': 19941, 'epoch': 3} {'type': 'loss', 'content': 0.09837370365858078, 'timestamp': '2025-10-01 04:44:56.161415', 'step': 19942, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:56.219423', 'step': 19942, 'epoch': 3} {'type': 'loss', 'content': 0.07444304972887039, 'timestamp': '2025-10-01 04:44:56.221915', 'step': 19943, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:56.278717', 'step': 19943, 'epoch': 3} {'type': 'loss', 'content': 0.09814857691526413, 'timestamp': '2025-10-01 04:44:56.285015', 'step': 19944, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:44:56.338443', 'step': 19944, 'epoch': 3} {'type': 'loss', 'content': 0.10292885452508926, 'timestamp': '2025-10-01 04:44:56.341092', 'step': 19945, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:44:56.397839', 'step': 19945, 'epoch': 3} {'type': 'loss', 'content': 0.08234903961420059, 'timestamp': '2025-10-01 04:44:56.400029', 'step': 19946, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:56.454582', 'step': 19946, 'epoch': 3} {'type': 'loss', 'content': 0.18027719855308533, 'timestamp': '2025-10-01 04:44:56.456789', 'step': 19947, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:56.517190', 'step': 19947, 'epoch': 3} {'type': 'loss', 'content': 0.10452389717102051, 'timestamp': '2025-10-01 04:44:56.523475', 'step': 19948, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:56.577022', 'step': 19948, 'epoch': 3} {'type': 'loss', 'content': 0.11141929030418396, 'timestamp': '2025-10-01 04:44:56.579361', 'step': 19949, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:56.632835', 'step': 19949, 'epoch': 3} {'type': 'loss', 'content': 0.08192315697669983, 'timestamp': '2025-10-01 04:44:56.635016', 'step': 19950, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:56.688228', 'step': 19950, 'epoch': 3} {'type': 'loss', 'content': 0.08431211858987808, 'timestamp': '2025-10-01 04:44:56.690367', 'step': 19951, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:56.744334', 'step': 19951, 'epoch': 3} {'type': 'loss', 'content': 0.09459897130727768, 'timestamp': '2025-10-01 04:44:56.751508', 'step': 19952, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:56.812004', 'step': 19952, 'epoch': 3} {'type': 'loss', 'content': 0.10165993869304657, 'timestamp': '2025-10-01 04:44:56.814325', 'step': 19953, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:56.868041', 'step': 19953, 'epoch': 3} {'type': 'loss', 'content': 0.08752064406871796, 'timestamp': '2025-10-01 04:44:56.870086', 'step': 19954, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:56.924446', 'step': 19954, 'epoch': 3} {'type': 'loss', 'content': 0.07756460458040237, 'timestamp': '2025-10-01 04:44:56.926497', 'step': 19955, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:56.980206', 'step': 19955, 'epoch': 3} {'type': 'loss', 'content': 0.11383221298456192, 'timestamp': '2025-10-01 04:44:56.986720', 'step': 19956, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:44:57.041570', 'step': 19956, 'epoch': 3} {'type': 'loss', 'content': 0.09888607263565063, 'timestamp': '2025-10-01 04:44:57.043689', 'step': 19957, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:57.098596', 'step': 19957, 'epoch': 3} {'type': 'loss', 'content': 0.09283412247896194, 'timestamp': '2025-10-01 04:44:57.100813', 'step': 19958, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:57.156078', 'step': 19958, 'epoch': 3} {'type': 'loss', 'content': 0.1101272702217102, 'timestamp': '2025-10-01 04:44:57.158340', 'step': 19959, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:57.213060', 'step': 19959, 'epoch': 3} {'type': 'loss', 'content': 0.10415014624595642, 'timestamp': '2025-10-01 04:44:57.219363', 'step': 19960, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:57.272654', 'step': 19960, 'epoch': 3} {'type': 'loss', 'content': 0.06480295211076736, 'timestamp': '2025-10-01 04:44:57.274931', 'step': 19961, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:57.328771', 'step': 19961, 'epoch': 3} {'type': 'loss', 'content': 0.11182574182748795, 'timestamp': '2025-10-01 04:44:57.330866', 'step': 19962, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:57.385150', 'step': 19962, 'epoch': 3} {'type': 'loss', 'content': 0.1002114862203598, 'timestamp': '2025-10-01 04:44:57.387201', 'step': 19963, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:57.441035', 'step': 19963, 'epoch': 3} {'type': 'loss', 'content': 0.12163907289505005, 'timestamp': '2025-10-01 04:44:57.447258', 'step': 19964, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:57.500097', 'step': 19964, 'epoch': 3} {'type': 'loss', 'content': 0.08728955686092377, 'timestamp': '2025-10-01 04:44:57.502223', 'step': 19965, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:44:57.556118', 'step': 19965, 'epoch': 3} {'type': 'loss', 'content': 0.09660419821739197, 'timestamp': '2025-10-01 04:44:57.558394', 'step': 19966, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:57.612779', 'step': 19966, 'epoch': 3} {'type': 'loss', 'content': 0.13225215673446655, 'timestamp': '2025-10-01 04:44:57.615086', 'step': 19967, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:57.669192', 'step': 19967, 'epoch': 3} {'type': 'loss', 'content': 0.06368699669837952, 'timestamp': '2025-10-01 04:44:57.675497', 'step': 19968, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:44:57.733927', 'step': 19968, 'epoch': 3} {'type': 'loss', 'content': 0.06869019567966461, 'timestamp': '2025-10-01 04:44:57.736140', 'step': 19969, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:57.795784', 'step': 19969, 'epoch': 3} {'type': 'loss', 'content': 0.09886236488819122, 'timestamp': '2025-10-01 04:44:57.798406', 'step': 19970, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:57.858530', 'step': 19970, 'epoch': 3} {'type': 'loss', 'content': 0.09436986595392227, 'timestamp': '2025-10-01 04:44:57.860772', 'step': 19971, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:57.920717', 'step': 19971, 'epoch': 3} {'type': 'loss', 'content': 0.10322516411542892, 'timestamp': '2025-10-01 04:44:57.927351', 'step': 19972, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:57.985405', 'step': 19972, 'epoch': 3} {'type': 'loss', 'content': 0.09781654924154282, 'timestamp': '2025-10-01 04:44:57.987567', 'step': 19973, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:58.045470', 'step': 19973, 'epoch': 3} {'type': 'loss', 'content': 0.08483628928661346, 'timestamp': '2025-10-01 04:44:58.047607', 'step': 19974, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:44:58.106379', 'step': 19974, 'epoch': 3} {'type': 'loss', 'content': 0.11832641065120697, 'timestamp': '2025-10-01 04:44:58.108640', 'step': 19975, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:58.167646', 'step': 19975, 'epoch': 3} {'type': 'loss', 'content': 0.04495988413691521, 'timestamp': '2025-10-01 04:44:58.174416', 'step': 19976, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:44:58.232210', 'step': 19976, 'epoch': 3} {'type': 'loss', 'content': 0.17185671627521515, 'timestamp': '2025-10-01 04:44:58.234504', 'step': 19977, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:58.292283', 'step': 19977, 'epoch': 3} {'type': 'loss', 'content': 0.039563145488500595, 'timestamp': '2025-10-01 04:44:58.294426', 'step': 19978, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:44:58.352445', 'step': 19978, 'epoch': 3} {'type': 'loss', 'content': 0.06966447830200195, 'timestamp': '2025-10-01 04:44:58.354923', 'step': 19979, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:58.409593', 'step': 19979, 'epoch': 3} {'type': 'loss', 'content': 0.055229462683200836, 'timestamp': '2025-10-01 04:44:58.415785', 'step': 19980, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:58.468546', 'step': 19980, 'epoch': 3} {'type': 'loss', 'content': 0.06654831022024155, 'timestamp': '2025-10-01 04:44:58.470821', 'step': 19981, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:58.524566', 'step': 19981, 'epoch': 3} {'type': 'loss', 'content': 0.051852427423000336, 'timestamp': '2025-10-01 04:44:58.526808', 'step': 19982, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:58.580128', 'step': 19982, 'epoch': 3} {'type': 'loss', 'content': 0.041018858551979065, 'timestamp': '2025-10-01 04:44:58.581893', 'step': 19983, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:58.634653', 'step': 19983, 'epoch': 3} {'type': 'loss', 'content': 0.1345251351594925, 'timestamp': '2025-10-01 04:44:58.640265', 'step': 19984, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:58.692987', 'step': 19984, 'epoch': 3} {'type': 'loss', 'content': 0.10107351839542389, 'timestamp': '2025-10-01 04:44:58.694769', 'step': 19985, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:58.747754', 'step': 19985, 'epoch': 3} {'type': 'loss', 'content': 0.15804430842399597, 'timestamp': '2025-10-01 04:44:58.749761', 'step': 19986, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:58.802947', 'step': 19986, 'epoch': 3} {'type': 'loss', 'content': 0.0638803169131279, 'timestamp': '2025-10-01 04:44:58.804737', 'step': 19987, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:58.858128', 'step': 19987, 'epoch': 3} {'type': 'loss', 'content': 0.025849848985671997, 'timestamp': '2025-10-01 04:44:58.863923', 'step': 19988, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:44:58.917002', 'step': 19988, 'epoch': 3} {'type': 'loss', 'content': 0.08794859796762466, 'timestamp': '2025-10-01 04:44:58.919004', 'step': 19989, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:58.972698', 'step': 19989, 'epoch': 3} {'type': 'loss', 'content': 0.12343722581863403, 'timestamp': '2025-10-01 04:44:58.974934', 'step': 19990, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:59.028339', 'step': 19990, 'epoch': 3} {'type': 'loss', 'content': 0.10065615177154541, 'timestamp': '2025-10-01 04:44:59.030381', 'step': 19991, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:59.083910', 'step': 19991, 'epoch': 3} {'type': 'loss', 'content': 0.024576300755143166, 'timestamp': '2025-10-01 04:44:59.089500', 'step': 19992, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:59.142575', 'step': 19992, 'epoch': 3} {'type': 'loss', 'content': 0.0553656741976738, 'timestamp': '2025-10-01 04:44:59.145087', 'step': 19993, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:44:59.197978', 'step': 19993, 'epoch': 3} {'type': 'loss', 'content': 0.09702084958553314, 'timestamp': '2025-10-01 04:44:59.199972', 'step': 19994, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:59.255798', 'step': 19994, 'epoch': 3} {'type': 'loss', 'content': 0.07447237521409988, 'timestamp': '2025-10-01 04:44:59.257885', 'step': 19995, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:59.311554', 'step': 19995, 'epoch': 3} {'type': 'loss', 'content': 0.12605755031108856, 'timestamp': '2025-10-01 04:44:59.317806', 'step': 19996, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:44:59.370540', 'step': 19996, 'epoch': 3} {'type': 'loss', 'content': 0.056759852916002274, 'timestamp': '2025-10-01 04:44:59.373569', 'step': 19997, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:59.428370', 'step': 19997, 'epoch': 3} {'type': 'loss', 'content': 0.07936703413724899, 'timestamp': '2025-10-01 04:44:59.431273', 'step': 19998, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:44:59.484595', 'step': 19998, 'epoch': 3} {'type': 'loss', 'content': 0.09326445311307907, 'timestamp': '2025-10-01 04:44:59.486601', 'step': 19999, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:44:59.540204', 'step': 19999, 'epoch': 3} {'type': 'loss', 'content': 0.038187894970178604, 'timestamp': '2025-10-01 04:44:59.546502', 'step': 20000, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 20000', 'timestamp': '2025-10-01 04:44:59.949986', 'step': 20000, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:00.004664', 'step': 20000, 'epoch': 3} {'type': 'loss', 'content': 0.12336637079715729, 'timestamp': '2025-10-01 04:45:00.006691', 'step': 20001, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:00.060886', 'step': 20001, 'epoch': 3} {'type': 'loss', 'content': 0.17314156889915466, 'timestamp': '2025-10-01 04:45:00.063004', 'step': 20002, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:45:00.117225', 'step': 20002, 'epoch': 3} {'type': 'loss', 'content': 0.08353396505117416, 'timestamp': '2025-10-01 04:45:00.120648', 'step': 20003, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:00.174825', 'step': 20003, 'epoch': 3} {'type': 'loss', 'content': 0.041753094643354416, 'timestamp': '2025-10-01 04:45:00.180578', 'step': 20004, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:00.233191', 'step': 20004, 'epoch': 3} {'type': 'loss', 'content': 0.0749535784125328, 'timestamp': '2025-10-01 04:45:00.235273', 'step': 20005, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:00.288923', 'step': 20005, 'epoch': 3} {'type': 'loss', 'content': 0.04824633151292801, 'timestamp': '2025-10-01 04:45:00.290912', 'step': 20006, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:00.345801', 'step': 20006, 'epoch': 3} {'type': 'loss', 'content': 0.18188707530498505, 'timestamp': '2025-10-01 04:45:00.347961', 'step': 20007, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:00.401678', 'step': 20007, 'epoch': 3} {'type': 'loss', 'content': 0.06403718888759613, 'timestamp': '2025-10-01 04:45:00.407356', 'step': 20008, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:00.468526', 'step': 20008, 'epoch': 3} {'type': 'loss', 'content': 0.04896954074501991, 'timestamp': '2025-10-01 04:45:00.470738', 'step': 20009, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:00.530658', 'step': 20009, 'epoch': 3} {'type': 'loss', 'content': 0.13894426822662354, 'timestamp': '2025-10-01 04:45:00.532794', 'step': 20010, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:00.586231', 'step': 20010, 'epoch': 3} {'type': 'loss', 'content': 0.15062209963798523, 'timestamp': '2025-10-01 04:45:00.588399', 'step': 20011, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:00.641802', 'step': 20011, 'epoch': 3} {'type': 'loss', 'content': 0.13963575661182404, 'timestamp': '2025-10-01 04:45:00.647436', 'step': 20012, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:00.700414', 'step': 20012, 'epoch': 3} {'type': 'loss', 'content': 0.03174661844968796, 'timestamp': '2025-10-01 04:45:00.703454', 'step': 20013, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:00.760316', 'step': 20013, 'epoch': 3} {'type': 'loss', 'content': 0.09202126413583755, 'timestamp': '2025-10-01 04:45:00.762573', 'step': 20014, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:00.815614', 'step': 20014, 'epoch': 3} {'type': 'loss', 'content': 0.16621033847332, 'timestamp': '2025-10-01 04:45:00.817695', 'step': 20015, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:00.870856', 'step': 20015, 'epoch': 3} {'type': 'loss', 'content': 0.12974224984645844, 'timestamp': '2025-10-01 04:45:00.876519', 'step': 20016, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:00.929838', 'step': 20016, 'epoch': 3} {'type': 'loss', 'content': 0.09179580956697464, 'timestamp': '2025-10-01 04:45:00.932173', 'step': 20017, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:00.997259', 'step': 20017, 'epoch': 3} {'type': 'loss', 'content': 0.06095993146300316, 'timestamp': '2025-10-01 04:45:00.999301', 'step': 20018, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:01.053606', 'step': 20018, 'epoch': 3} {'type': 'loss', 'content': 0.12067855149507523, 'timestamp': '2025-10-01 04:45:01.056674', 'step': 20019, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:45:01.112282', 'step': 20019, 'epoch': 3} {'type': 'loss', 'content': 0.10101164877414703, 'timestamp': '2025-10-01 04:45:01.117894', 'step': 20020, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:45:01.170889', 'step': 20020, 'epoch': 3} {'type': 'loss', 'content': 0.03780023008584976, 'timestamp': '2025-10-01 04:45:01.173145', 'step': 20021, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:01.227471', 'step': 20021, 'epoch': 3} {'type': 'loss', 'content': 0.12044119089841843, 'timestamp': '2025-10-01 04:45:01.229348', 'step': 20022, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:01.284040', 'step': 20022, 'epoch': 3} {'type': 'loss', 'content': 0.18242666125297546, 'timestamp': '2025-10-01 04:45:01.286272', 'step': 20023, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:01.340768', 'step': 20023, 'epoch': 3} {'type': 'loss', 'content': 0.12368206679821014, 'timestamp': '2025-10-01 04:45:01.346529', 'step': 20024, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:01.401107', 'step': 20024, 'epoch': 3} {'type': 'loss', 'content': 0.0908496081829071, 'timestamp': '2025-10-01 04:45:01.403470', 'step': 20025, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:01.457393', 'step': 20025, 'epoch': 3} {'type': 'loss', 'content': 0.040898725390434265, 'timestamp': '2025-10-01 04:45:01.459796', 'step': 20026, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:01.512897', 'step': 20026, 'epoch': 3} {'type': 'loss', 'content': 0.07885326445102692, 'timestamp': '2025-10-01 04:45:01.519529', 'step': 20027, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:01.573172', 'step': 20027, 'epoch': 3} {'type': 'loss', 'content': 0.058972954750061035, 'timestamp': '2025-10-01 04:45:01.578713', 'step': 20028, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:01.632611', 'step': 20028, 'epoch': 3} {'type': 'loss', 'content': 0.1719363033771515, 'timestamp': '2025-10-01 04:45:01.634668', 'step': 20029, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:01.688444', 'step': 20029, 'epoch': 3} {'type': 'loss', 'content': 0.18264904618263245, 'timestamp': '2025-10-01 04:45:01.690529', 'step': 20030, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:01.744135', 'step': 20030, 'epoch': 3} {'type': 'loss', 'content': 0.10289406776428223, 'timestamp': '2025-10-01 04:45:01.746172', 'step': 20031, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:01.800608', 'step': 20031, 'epoch': 3} {'type': 'loss', 'content': 0.08823926001787186, 'timestamp': '2025-10-01 04:45:01.806181', 'step': 20032, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:01.859564', 'step': 20032, 'epoch': 3} {'type': 'loss', 'content': 0.10165400803089142, 'timestamp': '2025-10-01 04:45:01.861665', 'step': 20033, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:45:01.918745', 'step': 20033, 'epoch': 3} {'type': 'loss', 'content': 0.16436074674129486, 'timestamp': '2025-10-01 04:45:01.920762', 'step': 20034, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:45:01.974256', 'step': 20034, 'epoch': 3} {'type': 'loss', 'content': 0.0691981241106987, 'timestamp': '2025-10-01 04:45:01.976247', 'step': 20035, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:02.029358', 'step': 20035, 'epoch': 3} {'type': 'loss', 'content': 0.1026836410164833, 'timestamp': '2025-10-01 04:45:02.034944', 'step': 20036, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:45:02.088747', 'step': 20036, 'epoch': 3} {'type': 'loss', 'content': 0.09779901057481766, 'timestamp': '2025-10-01 04:45:02.091170', 'step': 20037, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:02.149219', 'step': 20037, 'epoch': 3} {'type': 'loss', 'content': 0.1244407445192337, 'timestamp': '2025-10-01 04:45:02.151549', 'step': 20038, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-01 04:45:15.249585', 'step': 20038, 'epoch': 3} {'type': 'pplx', 'content': 12491.030496686495, 'timestamp': '2025-10-01 04:45:15.252616', 'step': 20038, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:15.306924', 'step': 20038, 'epoch': 3} {'type': 'loss', 'content': 0.07153511792421341, 'timestamp': '2025-10-01 04:45:15.309427', 'step': 20039, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:15.363016', 'step': 20039, 'epoch': 3} {'type': 'loss', 'content': 0.17285917699337006, 'timestamp': '2025-10-01 04:45:15.369150', 'step': 20040, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:15.422895', 'step': 20040, 'epoch': 3} {'type': 'loss', 'content': 0.09985394775867462, 'timestamp': '2025-10-01 04:45:15.424929', 'step': 20041, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:15.478136', 'step': 20041, 'epoch': 3} {'type': 'loss', 'content': 0.09546118974685669, 'timestamp': '2025-10-01 04:45:15.480212', 'step': 20042, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:15.533295', 'step': 20042, 'epoch': 3} {'type': 'loss', 'content': 0.04914310202002525, 'timestamp': '2025-10-01 04:45:15.535423', 'step': 20043, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:15.589682', 'step': 20043, 'epoch': 3} {'type': 'loss', 'content': 0.09406603872776031, 'timestamp': '2025-10-01 04:45:15.595911', 'step': 20044, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:15.648872', 'step': 20044, 'epoch': 3} {'type': 'loss', 'content': 0.05829715356230736, 'timestamp': '2025-10-01 04:45:15.651967', 'step': 20045, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:45:15.706286', 'step': 20045, 'epoch': 3} {'type': 'loss', 'content': 0.09560286998748779, 'timestamp': '2025-10-01 04:45:15.708912', 'step': 20046, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:15.762083', 'step': 20046, 'epoch': 3} {'type': 'loss', 'content': 0.07015863060951233, 'timestamp': '2025-10-01 04:45:15.769313', 'step': 20047, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:15.823614', 'step': 20047, 'epoch': 3} {'type': 'loss', 'content': 0.12977313995361328, 'timestamp': '2025-10-01 04:45:15.829442', 'step': 20048, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:15.882000', 'step': 20048, 'epoch': 3} {'type': 'loss', 'content': 0.08236414194107056, 'timestamp': '2025-10-01 04:45:15.884093', 'step': 20049, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:45:15.936839', 'step': 20049, 'epoch': 3} {'type': 'loss', 'content': 0.10194671154022217, 'timestamp': '2025-10-01 04:45:15.938892', 'step': 20050, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:15.993769', 'step': 20050, 'epoch': 3} {'type': 'loss', 'content': 0.03983381763100624, 'timestamp': '2025-10-01 04:45:15.995996', 'step': 20051, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:16.049487', 'step': 20051, 'epoch': 3} {'type': 'loss', 'content': 0.028369372710585594, 'timestamp': '2025-10-01 04:45:16.056075', 'step': 20052, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:16.108967', 'step': 20052, 'epoch': 3} {'type': 'loss', 'content': 0.05262823775410652, 'timestamp': '2025-10-01 04:45:16.111499', 'step': 20053, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:16.166623', 'step': 20053, 'epoch': 3} {'type': 'loss', 'content': 0.0679541602730751, 'timestamp': '2025-10-01 04:45:16.169204', 'step': 20054, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:16.222926', 'step': 20054, 'epoch': 3} {'type': 'loss', 'content': 0.07605770230293274, 'timestamp': '2025-10-01 04:45:16.225208', 'step': 20055, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:16.285847', 'step': 20055, 'epoch': 3} {'type': 'loss', 'content': 0.13140439987182617, 'timestamp': '2025-10-01 04:45:16.291736', 'step': 20056, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:16.344779', 'step': 20056, 'epoch': 3} {'type': 'loss', 'content': 0.05613109841942787, 'timestamp': '2025-10-01 04:45:16.347321', 'step': 20057, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:16.411915', 'step': 20057, 'epoch': 3} {'type': 'loss', 'content': 0.1245393380522728, 'timestamp': '2025-10-01 04:45:16.414888', 'step': 20058, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:16.467861', 'step': 20058, 'epoch': 3} {'type': 'loss', 'content': 0.1769019365310669, 'timestamp': '2025-10-01 04:45:16.469919', 'step': 20059, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:45:16.536488', 'step': 20059, 'epoch': 3} {'type': 'loss', 'content': 0.11381716281175613, 'timestamp': '2025-10-01 04:45:16.542353', 'step': 20060, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:16.595159', 'step': 20060, 'epoch': 3} {'type': 'loss', 'content': 0.09900286048650742, 'timestamp': '2025-10-01 04:45:16.598557', 'step': 20061, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:16.651884', 'step': 20061, 'epoch': 3} {'type': 'loss', 'content': 0.05902738869190216, 'timestamp': '2025-10-01 04:45:16.663839', 'step': 20062, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:16.717876', 'step': 20062, 'epoch': 3} {'type': 'loss', 'content': 0.046880125999450684, 'timestamp': '2025-10-01 04:45:16.720094', 'step': 20063, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:16.773337', 'step': 20063, 'epoch': 3} {'type': 'loss', 'content': 0.08316553384065628, 'timestamp': '2025-10-01 04:45:16.784956', 'step': 20064, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:16.838701', 'step': 20064, 'epoch': 3} {'type': 'loss', 'content': 0.05341140925884247, 'timestamp': '2025-10-01 04:45:16.848413', 'step': 20065, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:16.902120', 'step': 20065, 'epoch': 3} {'type': 'loss', 'content': 0.09653061628341675, 'timestamp': '2025-10-01 04:45:16.904240', 'step': 20066, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:16.964121', 'step': 20066, 'epoch': 3} {'type': 'loss', 'content': 0.07280829548835754, 'timestamp': '2025-10-01 04:45:16.967172', 'step': 20067, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:17.020496', 'step': 20067, 'epoch': 3} {'type': 'loss', 'content': 0.1278703212738037, 'timestamp': '2025-10-01 04:45:17.026254', 'step': 20068, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:17.079524', 'step': 20068, 'epoch': 3} {'type': 'loss', 'content': 0.007934221997857094, 'timestamp': '2025-10-01 04:45:17.082315', 'step': 20069, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:17.135789', 'step': 20069, 'epoch': 3} {'type': 'loss', 'content': 0.1100260466337204, 'timestamp': '2025-10-01 04:45:17.138800', 'step': 20070, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:45:17.192825', 'step': 20070, 'epoch': 3} {'type': 'loss', 'content': 0.06734796613454819, 'timestamp': '2025-10-01 04:45:17.195854', 'step': 20071, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:45:17.249156', 'step': 20071, 'epoch': 3} {'type': 'loss', 'content': 0.08421073108911514, 'timestamp': '2025-10-01 04:45:17.254773', 'step': 20072, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:45:17.307303', 'step': 20072, 'epoch': 3} {'type': 'loss', 'content': 0.13115257024765015, 'timestamp': '2025-10-01 04:45:17.309456', 'step': 20073, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:17.362575', 'step': 20073, 'epoch': 3} {'type': 'loss', 'content': 0.11937060207128525, 'timestamp': '2025-10-01 04:45:17.365156', 'step': 20074, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:45:17.419130', 'step': 20074, 'epoch': 3} {'type': 'loss', 'content': 0.09312139451503754, 'timestamp': '2025-10-01 04:45:17.422175', 'step': 20075, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:17.475634', 'step': 20075, 'epoch': 3} {'type': 'loss', 'content': 0.07573609799146652, 'timestamp': '2025-10-01 04:45:17.481272', 'step': 20076, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:17.533044', 'step': 20076, 'epoch': 3} {'type': 'loss', 'content': 0.07615085691213608, 'timestamp': '2025-10-01 04:45:17.537605', 'step': 20077, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:17.589867', 'step': 20077, 'epoch': 3} {'type': 'loss', 'content': 0.04838678613305092, 'timestamp': '2025-10-01 04:45:17.591937', 'step': 20078, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:17.645140', 'step': 20078, 'epoch': 3} {'type': 'loss', 'content': 0.06927972286939621, 'timestamp': '2025-10-01 04:45:17.647148', 'step': 20079, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:17.700077', 'step': 20079, 'epoch': 3} {'type': 'loss', 'content': 0.12923747301101685, 'timestamp': '2025-10-01 04:45:17.705909', 'step': 20080, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:17.758142', 'step': 20080, 'epoch': 3} {'type': 'loss', 'content': 0.04077837988734245, 'timestamp': '2025-10-01 04:45:17.760137', 'step': 20081, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:17.812820', 'step': 20081, 'epoch': 3} {'type': 'loss', 'content': 0.08178367465734482, 'timestamp': '2025-10-01 04:45:17.814918', 'step': 20082, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:45:17.868050', 'step': 20082, 'epoch': 3} {'type': 'loss', 'content': 0.15444964170455933, 'timestamp': '2025-10-01 04:45:17.870192', 'step': 20083, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:17.924039', 'step': 20083, 'epoch': 3} {'type': 'loss', 'content': 0.11204620450735092, 'timestamp': '2025-10-01 04:45:17.929654', 'step': 20084, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:45:17.982199', 'step': 20084, 'epoch': 3} {'type': 'loss', 'content': 0.1322731375694275, 'timestamp': '2025-10-01 04:45:17.984291', 'step': 20085, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:18.037671', 'step': 20085, 'epoch': 3} {'type': 'loss', 'content': 0.07334499061107635, 'timestamp': '2025-10-01 04:45:18.041200', 'step': 20086, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:18.103374', 'step': 20086, 'epoch': 3} {'type': 'loss', 'content': 0.1382385492324829, 'timestamp': '2025-10-01 04:45:18.105568', 'step': 20087, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:18.167405', 'step': 20087, 'epoch': 3} {'type': 'loss', 'content': 0.05769208073616028, 'timestamp': '2025-10-01 04:45:18.173247', 'step': 20088, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:45:18.225542', 'step': 20088, 'epoch': 3} {'type': 'loss', 'content': 0.15414990484714508, 'timestamp': '2025-10-01 04:45:18.227809', 'step': 20089, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:18.281729', 'step': 20089, 'epoch': 3} {'type': 'loss', 'content': 0.106434665620327, 'timestamp': '2025-10-01 04:45:18.283792', 'step': 20090, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:18.336779', 'step': 20090, 'epoch': 3} {'type': 'loss', 'content': 0.15590254962444305, 'timestamp': '2025-10-01 04:45:18.339040', 'step': 20091, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:18.392400', 'step': 20091, 'epoch': 3} {'type': 'loss', 'content': 0.11173119395971298, 'timestamp': '2025-10-01 04:45:18.398142', 'step': 20092, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:18.451012', 'step': 20092, 'epoch': 3} {'type': 'loss', 'content': 0.01691528968513012, 'timestamp': '2025-10-01 04:45:18.453520', 'step': 20093, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:18.506633', 'step': 20093, 'epoch': 3} {'type': 'loss', 'content': 0.08245590329170227, 'timestamp': '2025-10-01 04:45:18.508730', 'step': 20094, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:45:18.563706', 'step': 20094, 'epoch': 3} {'type': 'loss', 'content': 0.11908204108476639, 'timestamp': '2025-10-01 04:45:18.565870', 'step': 20095, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:18.624561', 'step': 20095, 'epoch': 3} {'type': 'loss', 'content': 0.0830610990524292, 'timestamp': '2025-10-01 04:45:18.630218', 'step': 20096, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-01 04:45:18.683429', 'step': 20096, 'epoch': 3} {'type': 'loss', 'content': 0.05600328743457794, 'timestamp': '2025-10-01 04:45:18.689929', 'step': 20097, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:45:18.742793', 'step': 20097, 'epoch': 3} {'type': 'loss', 'content': 0.048501111567020416, 'timestamp': '2025-10-01 04:45:18.744823', 'step': 20098, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:18.797225', 'step': 20098, 'epoch': 3} {'type': 'loss', 'content': 0.095659539103508, 'timestamp': '2025-10-01 04:45:18.799386', 'step': 20099, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:18.852436', 'step': 20099, 'epoch': 3} {'type': 'loss', 'content': 0.08234352618455887, 'timestamp': '2025-10-01 04:45:18.857990', 'step': 20100, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:18.910712', 'step': 20100, 'epoch': 3} {'type': 'loss', 'content': 0.07642821967601776, 'timestamp': '2025-10-01 04:45:18.912912', 'step': 20101, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:18.965900', 'step': 20101, 'epoch': 3} {'type': 'loss', 'content': 0.12052033841609955, 'timestamp': '2025-10-01 04:45:18.968061', 'step': 20102, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:19.021674', 'step': 20102, 'epoch': 3} {'type': 'loss', 'content': 0.09160321205854416, 'timestamp': '2025-10-01 04:45:19.023968', 'step': 20103, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:19.080903', 'step': 20103, 'epoch': 3} {'type': 'loss', 'content': 0.09896044433116913, 'timestamp': '2025-10-01 04:45:19.087051', 'step': 20104, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:19.140488', 'step': 20104, 'epoch': 3} {'type': 'loss', 'content': 0.10887555778026581, 'timestamp': '2025-10-01 04:45:19.142754', 'step': 20105, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:19.197117', 'step': 20105, 'epoch': 3} {'type': 'loss', 'content': 0.12743210792541504, 'timestamp': '2025-10-01 04:45:19.199417', 'step': 20106, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:19.254087', 'step': 20106, 'epoch': 3} {'type': 'loss', 'content': 0.057376522570848465, 'timestamp': '2025-10-01 04:45:19.256716', 'step': 20107, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:19.310395', 'step': 20107, 'epoch': 3} {'type': 'loss', 'content': 0.13039006292819977, 'timestamp': '2025-10-01 04:45:19.316480', 'step': 20108, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:19.369572', 'step': 20108, 'epoch': 3} {'type': 'loss', 'content': 0.10700523108243942, 'timestamp': '2025-10-01 04:45:19.372006', 'step': 20109, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:19.426114', 'step': 20109, 'epoch': 3} {'type': 'loss', 'content': 0.05668710917234421, 'timestamp': '2025-10-01 04:45:19.428429', 'step': 20110, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:19.495640', 'step': 20110, 'epoch': 3} {'type': 'loss', 'content': 0.0854969173669815, 'timestamp': '2025-10-01 04:45:19.497962', 'step': 20111, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:19.551916', 'step': 20111, 'epoch': 3} {'type': 'loss', 'content': 0.11146318912506104, 'timestamp': '2025-10-01 04:45:19.557549', 'step': 20112, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:45:19.611425', 'step': 20112, 'epoch': 3} {'type': 'loss', 'content': 0.06554592400789261, 'timestamp': '2025-10-01 04:45:19.613880', 'step': 20113, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:19.668077', 'step': 20113, 'epoch': 3} {'type': 'loss', 'content': 0.06457027047872543, 'timestamp': '2025-10-01 04:45:19.674952', 'step': 20114, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:45:19.731201', 'step': 20114, 'epoch': 3} {'type': 'loss', 'content': 0.07091334462165833, 'timestamp': '2025-10-01 04:45:19.733367', 'step': 20115, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:19.787711', 'step': 20115, 'epoch': 3} {'type': 'loss', 'content': 0.05694089084863663, 'timestamp': '2025-10-01 04:45:19.793669', 'step': 20116, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:45:19.847110', 'step': 20116, 'epoch': 3} {'type': 'loss', 'content': 0.20119062066078186, 'timestamp': '2025-10-01 04:45:19.849594', 'step': 20117, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:19.903624', 'step': 20117, 'epoch': 3} {'type': 'loss', 'content': 0.09185261279344559, 'timestamp': '2025-10-01 04:45:19.906030', 'step': 20118, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:19.960234', 'step': 20118, 'epoch': 3} {'type': 'loss', 'content': 0.07080972194671631, 'timestamp': '2025-10-01 04:45:19.962664', 'step': 20119, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:20.017258', 'step': 20119, 'epoch': 3} {'type': 'loss', 'content': 0.16226254403591156, 'timestamp': '2025-10-01 04:45:20.024714', 'step': 20120, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:20.077960', 'step': 20120, 'epoch': 3} {'type': 'loss', 'content': 0.14035342633724213, 'timestamp': '2025-10-01 04:45:20.080413', 'step': 20121, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:20.134990', 'step': 20121, 'epoch': 3} {'type': 'loss', 'content': 0.12016549706459045, 'timestamp': '2025-10-01 04:45:20.137442', 'step': 20122, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:45:20.192129', 'step': 20122, 'epoch': 3} {'type': 'loss', 'content': 0.09212261438369751, 'timestamp': '2025-10-01 04:45:20.194520', 'step': 20123, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:20.248198', 'step': 20123, 'epoch': 3} {'type': 'loss', 'content': 0.06771273910999298, 'timestamp': '2025-10-01 04:45:20.253827', 'step': 20124, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:20.307608', 'step': 20124, 'epoch': 3} {'type': 'loss', 'content': 0.04825171083211899, 'timestamp': '2025-10-01 04:45:20.310070', 'step': 20125, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:45:20.364020', 'step': 20125, 'epoch': 3} {'type': 'loss', 'content': 0.09310613572597504, 'timestamp': '2025-10-01 04:45:20.366057', 'step': 20126, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:20.421728', 'step': 20126, 'epoch': 3} {'type': 'loss', 'content': 0.07987084239721298, 'timestamp': '2025-10-01 04:45:20.429836', 'step': 20127, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:45:20.483459', 'step': 20127, 'epoch': 3} {'type': 'loss', 'content': 0.08154571801424026, 'timestamp': '2025-10-01 04:45:20.489035', 'step': 20128, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:20.541860', 'step': 20128, 'epoch': 3} {'type': 'loss', 'content': 0.14024773240089417, 'timestamp': '2025-10-01 04:45:20.543952', 'step': 20129, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:20.597112', 'step': 20129, 'epoch': 3} {'type': 'loss', 'content': 0.11748697608709335, 'timestamp': '2025-10-01 04:45:20.599216', 'step': 20130, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:20.654529', 'step': 20130, 'epoch': 3} {'type': 'loss', 'content': 0.026721689850091934, 'timestamp': '2025-10-01 04:45:20.656606', 'step': 20131, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:20.710930', 'step': 20131, 'epoch': 3} {'type': 'loss', 'content': 0.04608174040913582, 'timestamp': '2025-10-01 04:45:20.716864', 'step': 20132, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:20.769316', 'step': 20132, 'epoch': 3} {'type': 'loss', 'content': 0.07454966753721237, 'timestamp': '2025-10-01 04:45:20.771462', 'step': 20133, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:20.824656', 'step': 20133, 'epoch': 3} {'type': 'loss', 'content': 0.16941513121128082, 'timestamp': '2025-10-01 04:45:20.826851', 'step': 20134, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:20.880322', 'step': 20134, 'epoch': 3} {'type': 'loss', 'content': 0.08206915855407715, 'timestamp': '2025-10-01 04:45:20.882549', 'step': 20135, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:20.935441', 'step': 20135, 'epoch': 3} {'type': 'loss', 'content': 0.11023017019033432, 'timestamp': '2025-10-01 04:45:20.941049', 'step': 20136, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:20.994118', 'step': 20136, 'epoch': 3} {'type': 'loss', 'content': 0.06225110590457916, 'timestamp': '2025-10-01 04:45:20.996965', 'step': 20137, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:21.049880', 'step': 20137, 'epoch': 3} {'type': 'loss', 'content': 0.08759116381406784, 'timestamp': '2025-10-01 04:45:21.052063', 'step': 20138, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:21.105283', 'step': 20138, 'epoch': 3} {'type': 'loss', 'content': 0.09351171553134918, 'timestamp': '2025-10-01 04:45:21.107360', 'step': 20139, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:21.163394', 'step': 20139, 'epoch': 3} {'type': 'loss', 'content': 0.10944854468107224, 'timestamp': '2025-10-01 04:45:21.169073', 'step': 20140, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:45:21.221911', 'step': 20140, 'epoch': 3} {'type': 'loss', 'content': 0.23208075761795044, 'timestamp': '2025-10-01 04:45:21.231186', 'step': 20141, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:21.284049', 'step': 20141, 'epoch': 3} {'type': 'loss', 'content': 0.15094026923179626, 'timestamp': '2025-10-01 04:45:21.298425', 'step': 20142, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:21.353486', 'step': 20142, 'epoch': 3} {'type': 'loss', 'content': 0.030704809352755547, 'timestamp': '2025-10-01 04:45:21.356163', 'step': 20143, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:21.409501', 'step': 20143, 'epoch': 3} {'type': 'loss', 'content': 0.06980136036872864, 'timestamp': '2025-10-01 04:45:21.415118', 'step': 20144, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:21.467751', 'step': 20144, 'epoch': 3} {'type': 'loss', 'content': 0.046820078045129776, 'timestamp': '2025-10-01 04:45:21.469782', 'step': 20145, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:21.529653', 'step': 20145, 'epoch': 3} {'type': 'loss', 'content': 0.04232136160135269, 'timestamp': '2025-10-01 04:45:21.531745', 'step': 20146, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:21.584489', 'step': 20146, 'epoch': 3} {'type': 'loss', 'content': 0.11167625337839127, 'timestamp': '2025-10-01 04:45:21.586650', 'step': 20147, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:21.640110', 'step': 20147, 'epoch': 3} {'type': 'loss', 'content': 0.07618039846420288, 'timestamp': '2025-10-01 04:45:21.646023', 'step': 20148, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:21.704906', 'step': 20148, 'epoch': 3} {'type': 'loss', 'content': 0.13104218244552612, 'timestamp': '2025-10-01 04:45:21.706936', 'step': 20149, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:21.760932', 'step': 20149, 'epoch': 3} {'type': 'loss', 'content': 0.10375164449214935, 'timestamp': '2025-10-01 04:45:21.763471', 'step': 20150, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:21.816711', 'step': 20150, 'epoch': 3} {'type': 'loss', 'content': 0.09667010605335236, 'timestamp': '2025-10-01 04:45:21.818787', 'step': 20151, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:45:21.872140', 'step': 20151, 'epoch': 3} {'type': 'loss', 'content': 0.0797400176525116, 'timestamp': '2025-10-01 04:45:21.877697', 'step': 20152, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:21.930644', 'step': 20152, 'epoch': 3} {'type': 'loss', 'content': 0.03208358213305473, 'timestamp': '2025-10-01 04:45:21.932790', 'step': 20153, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:21.993271', 'step': 20153, 'epoch': 3} {'type': 'loss', 'content': 0.15519212186336517, 'timestamp': '2025-10-01 04:45:21.995351', 'step': 20154, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:22.049863', 'step': 20154, 'epoch': 3} {'type': 'loss', 'content': 0.03822147101163864, 'timestamp': '2025-10-01 04:45:22.052737', 'step': 20155, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:22.125431', 'step': 20155, 'epoch': 3} {'type': 'loss', 'content': 0.06686098128557205, 'timestamp': '2025-10-01 04:45:22.131120', 'step': 20156, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:22.184369', 'step': 20156, 'epoch': 3} {'type': 'loss', 'content': 0.0525326132774353, 'timestamp': '2025-10-01 04:45:22.186380', 'step': 20157, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:45:22.239392', 'step': 20157, 'epoch': 3} {'type': 'loss', 'content': 0.12813934683799744, 'timestamp': '2025-10-01 04:45:22.241454', 'step': 20158, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:22.294713', 'step': 20158, 'epoch': 3} {'type': 'loss', 'content': 0.09984364360570908, 'timestamp': '2025-10-01 04:45:22.296879', 'step': 20159, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:22.349946', 'step': 20159, 'epoch': 3} {'type': 'loss', 'content': 0.14666584134101868, 'timestamp': '2025-10-01 04:45:22.355638', 'step': 20160, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:22.408674', 'step': 20160, 'epoch': 3} {'type': 'loss', 'content': 0.09254623204469681, 'timestamp': '2025-10-01 04:45:22.411071', 'step': 20161, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:22.464880', 'step': 20161, 'epoch': 3} {'type': 'loss', 'content': 0.0823347344994545, 'timestamp': '2025-10-01 04:45:22.467299', 'step': 20162, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:22.520863', 'step': 20162, 'epoch': 3} {'type': 'loss', 'content': 0.05317673459649086, 'timestamp': '2025-10-01 04:45:22.522953', 'step': 20163, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:45:22.575559', 'step': 20163, 'epoch': 3} {'type': 'loss', 'content': 0.14947456121444702, 'timestamp': '2025-10-01 04:45:22.581218', 'step': 20164, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:22.633796', 'step': 20164, 'epoch': 3} {'type': 'loss', 'content': 0.042340248823165894, 'timestamp': '2025-10-01 04:45:22.636020', 'step': 20165, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:22.689181', 'step': 20165, 'epoch': 3} {'type': 'loss', 'content': 0.07335388660430908, 'timestamp': '2025-10-01 04:45:22.691365', 'step': 20166, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:22.744566', 'step': 20166, 'epoch': 3} {'type': 'loss', 'content': 0.0773145779967308, 'timestamp': '2025-10-01 04:45:22.746589', 'step': 20167, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:45:22.799441', 'step': 20167, 'epoch': 3} {'type': 'loss', 'content': 0.08574356138706207, 'timestamp': '2025-10-01 04:45:22.805041', 'step': 20168, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:22.857810', 'step': 20168, 'epoch': 3} {'type': 'loss', 'content': 0.04237036779522896, 'timestamp': '2025-10-01 04:45:22.859847', 'step': 20169, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:22.913261', 'step': 20169, 'epoch': 3} {'type': 'loss', 'content': 0.11460848152637482, 'timestamp': '2025-10-01 04:45:22.915320', 'step': 20170, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:22.968584', 'step': 20170, 'epoch': 3} {'type': 'loss', 'content': 0.06532134860754013, 'timestamp': '2025-10-01 04:45:22.970625', 'step': 20171, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:23.024124', 'step': 20171, 'epoch': 3} {'type': 'loss', 'content': 0.09864675253629684, 'timestamp': '2025-10-01 04:45:23.029785', 'step': 20172, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:23.083346', 'step': 20172, 'epoch': 3} {'type': 'loss', 'content': 0.11755578964948654, 'timestamp': '2025-10-01 04:45:23.085521', 'step': 20173, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:23.138255', 'step': 20173, 'epoch': 3} {'type': 'loss', 'content': 0.11524171382188797, 'timestamp': '2025-10-01 04:45:23.140371', 'step': 20174, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:23.193244', 'step': 20174, 'epoch': 3} {'type': 'loss', 'content': 0.1178816556930542, 'timestamp': '2025-10-01 04:45:23.195425', 'step': 20175, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:23.249286', 'step': 20175, 'epoch': 3} {'type': 'loss', 'content': 0.09377230703830719, 'timestamp': '2025-10-01 04:45:23.255010', 'step': 20176, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:45:23.307817', 'step': 20176, 'epoch': 3} {'type': 'loss', 'content': 0.07421568036079407, 'timestamp': '2025-10-01 04:45:23.309883', 'step': 20177, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:23.362842', 'step': 20177, 'epoch': 3} {'type': 'loss', 'content': 0.06769168376922607, 'timestamp': '2025-10-01 04:45:23.364901', 'step': 20178, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:23.418541', 'step': 20178, 'epoch': 3} {'type': 'loss', 'content': 0.03607708588242531, 'timestamp': '2025-10-01 04:45:23.420545', 'step': 20179, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:23.474116', 'step': 20179, 'epoch': 3} {'type': 'loss', 'content': 0.03743986785411835, 'timestamp': '2025-10-01 04:45:23.479784', 'step': 20180, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:45:23.532358', 'step': 20180, 'epoch': 3} {'type': 'loss', 'content': 0.048568855971097946, 'timestamp': '2025-10-01 04:45:23.534456', 'step': 20181, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:23.587614', 'step': 20181, 'epoch': 3} {'type': 'loss', 'content': 0.05548916384577751, 'timestamp': '2025-10-01 04:45:23.589781', 'step': 20182, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:23.642531', 'step': 20182, 'epoch': 3} {'type': 'loss', 'content': 0.04998641461133957, 'timestamp': '2025-10-01 04:45:23.644556', 'step': 20183, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:23.697437', 'step': 20183, 'epoch': 3} {'type': 'loss', 'content': 0.01619771309196949, 'timestamp': '2025-10-01 04:45:23.703069', 'step': 20184, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:23.755875', 'step': 20184, 'epoch': 3} {'type': 'loss', 'content': 0.1474636048078537, 'timestamp': '2025-10-01 04:45:23.757879', 'step': 20185, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:23.811050', 'step': 20185, 'epoch': 3} {'type': 'loss', 'content': 0.06895430386066437, 'timestamp': '2025-10-01 04:45:23.813228', 'step': 20186, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:45:23.868792', 'step': 20186, 'epoch': 3} {'type': 'loss', 'content': 0.09644875675439835, 'timestamp': '2025-10-01 04:45:23.870979', 'step': 20187, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:23.924616', 'step': 20187, 'epoch': 3} {'type': 'loss', 'content': 0.032203882932662964, 'timestamp': '2025-10-01 04:45:23.930432', 'step': 20188, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:45:23.982893', 'step': 20188, 'epoch': 3} {'type': 'loss', 'content': 0.09772000461816788, 'timestamp': '2025-10-01 04:45:23.984989', 'step': 20189, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:24.037788', 'step': 20189, 'epoch': 3} {'type': 'loss', 'content': 0.08950643241405487, 'timestamp': '2025-10-01 04:45:24.040039', 'step': 20190, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:24.093416', 'step': 20190, 'epoch': 3} {'type': 'loss', 'content': 0.1072288528084755, 'timestamp': '2025-10-01 04:45:24.095788', 'step': 20191, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:24.162538', 'step': 20191, 'epoch': 3} {'type': 'loss', 'content': 0.03615347295999527, 'timestamp': '2025-10-01 04:45:24.169965', 'step': 20192, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:45:24.223299', 'step': 20192, 'epoch': 3} {'type': 'loss', 'content': 0.1570345163345337, 'timestamp': '2025-10-01 04:45:24.225350', 'step': 20193, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:24.278134', 'step': 20193, 'epoch': 3} {'type': 'loss', 'content': 0.17133137583732605, 'timestamp': '2025-10-01 04:45:24.280159', 'step': 20194, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:45:24.333501', 'step': 20194, 'epoch': 3} {'type': 'loss', 'content': 0.11885940283536911, 'timestamp': '2025-10-01 04:45:24.335539', 'step': 20195, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:24.388394', 'step': 20195, 'epoch': 3} {'type': 'loss', 'content': 0.07031264901161194, 'timestamp': '2025-10-01 04:45:24.394071', 'step': 20196, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:24.446223', 'step': 20196, 'epoch': 3} {'type': 'loss', 'content': 0.03787106275558472, 'timestamp': '2025-10-01 04:45:24.448379', 'step': 20197, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:24.501455', 'step': 20197, 'epoch': 3} {'type': 'loss', 'content': 0.11063966900110245, 'timestamp': '2025-10-01 04:45:24.504346', 'step': 20198, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:24.558441', 'step': 20198, 'epoch': 3} {'type': 'loss', 'content': 0.18069656193256378, 'timestamp': '2025-10-01 04:45:24.560472', 'step': 20199, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:24.613901', 'step': 20199, 'epoch': 3} {'type': 'loss', 'content': 0.06849053502082825, 'timestamp': '2025-10-01 04:45:24.619546', 'step': 20200, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:24.672492', 'step': 20200, 'epoch': 3} {'type': 'loss', 'content': 0.14279291033744812, 'timestamp': '2025-10-01 04:45:24.674952', 'step': 20201, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:45:24.728625', 'step': 20201, 'epoch': 3} {'type': 'loss', 'content': 0.15955165028572083, 'timestamp': '2025-10-01 04:45:24.731032', 'step': 20202, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:45:24.784503', 'step': 20202, 'epoch': 3} {'type': 'loss', 'content': 0.08565240353345871, 'timestamp': '2025-10-01 04:45:24.786565', 'step': 20203, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:24.840912', 'step': 20203, 'epoch': 3} {'type': 'loss', 'content': 0.0659598857164383, 'timestamp': '2025-10-01 04:45:24.846694', 'step': 20204, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:24.899763', 'step': 20204, 'epoch': 3} {'type': 'loss', 'content': 0.14537177979946136, 'timestamp': '2025-10-01 04:45:24.902057', 'step': 20205, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:45:24.955564', 'step': 20205, 'epoch': 3} {'type': 'loss', 'content': 0.18262898921966553, 'timestamp': '2025-10-01 04:45:24.957665', 'step': 20206, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:45:25.010819', 'step': 20206, 'epoch': 3} {'type': 'loss', 'content': 0.05925809219479561, 'timestamp': '2025-10-01 04:45:25.012787', 'step': 20207, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:25.066213', 'step': 20207, 'epoch': 3} {'type': 'loss', 'content': 0.0711439922451973, 'timestamp': '2025-10-01 04:45:25.072033', 'step': 20208, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:25.124697', 'step': 20208, 'epoch': 3} {'type': 'loss', 'content': 0.09899178892374039, 'timestamp': '2025-10-01 04:45:25.126816', 'step': 20209, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:25.180113', 'step': 20209, 'epoch': 3} {'type': 'loss', 'content': 0.09639021009206772, 'timestamp': '2025-10-01 04:45:25.182187', 'step': 20210, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:45:25.235140', 'step': 20210, 'epoch': 3} {'type': 'loss', 'content': 0.12543345987796783, 'timestamp': '2025-10-01 04:45:25.237162', 'step': 20211, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:25.290252', 'step': 20211, 'epoch': 3} {'type': 'loss', 'content': 0.11375055462121964, 'timestamp': '2025-10-01 04:45:25.302536', 'step': 20212, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:25.355110', 'step': 20212, 'epoch': 3} {'type': 'loss', 'content': 0.16514045000076294, 'timestamp': '2025-10-01 04:45:25.357129', 'step': 20213, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:25.410434', 'step': 20213, 'epoch': 3} {'type': 'loss', 'content': 0.03892951086163521, 'timestamp': '2025-10-01 04:45:25.412470', 'step': 20214, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:25.466105', 'step': 20214, 'epoch': 3} {'type': 'loss', 'content': 0.047185685485601425, 'timestamp': '2025-10-01 04:45:25.468191', 'step': 20215, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:45:25.521293', 'step': 20215, 'epoch': 3} {'type': 'loss', 'content': 0.061848003417253494, 'timestamp': '2025-10-01 04:45:25.526985', 'step': 20216, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:25.579760', 'step': 20216, 'epoch': 3} {'type': 'loss', 'content': 0.10118233412504196, 'timestamp': '2025-10-01 04:45:25.581961', 'step': 20217, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:25.635356', 'step': 20217, 'epoch': 3} {'type': 'loss', 'content': 0.09414743632078171, 'timestamp': '2025-10-01 04:45:25.637538', 'step': 20218, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:25.691405', 'step': 20218, 'epoch': 3} {'type': 'loss', 'content': 0.12962642312049866, 'timestamp': '2025-10-01 04:45:25.693664', 'step': 20219, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:25.747214', 'step': 20219, 'epoch': 3} {'type': 'loss', 'content': 0.0932142436504364, 'timestamp': '2025-10-01 04:45:25.752813', 'step': 20220, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:25.805844', 'step': 20220, 'epoch': 3} {'type': 'loss', 'content': 0.08516576141119003, 'timestamp': '2025-10-01 04:45:25.808007', 'step': 20221, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:45:25.861333', 'step': 20221, 'epoch': 3} {'type': 'loss', 'content': 0.0652366429567337, 'timestamp': '2025-10-01 04:45:25.863387', 'step': 20222, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:25.916981', 'step': 20222, 'epoch': 3} {'type': 'loss', 'content': 0.14998772740364075, 'timestamp': '2025-10-01 04:45:25.919033', 'step': 20223, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:25.972339', 'step': 20223, 'epoch': 3} {'type': 'loss', 'content': 0.03845493495464325, 'timestamp': '2025-10-01 04:45:25.978029', 'step': 20224, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:26.030786', 'step': 20224, 'epoch': 3} {'type': 'loss', 'content': 0.0397304892539978, 'timestamp': '2025-10-01 04:45:26.032951', 'step': 20225, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:26.086354', 'step': 20225, 'epoch': 3} {'type': 'loss', 'content': 0.09062561392784119, 'timestamp': '2025-10-01 04:45:26.088487', 'step': 20226, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:26.142265', 'step': 20226, 'epoch': 3} {'type': 'loss', 'content': 0.07473258674144745, 'timestamp': '2025-10-01 04:45:26.144306', 'step': 20227, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:26.198384', 'step': 20227, 'epoch': 3} {'type': 'loss', 'content': 0.08630967140197754, 'timestamp': '2025-10-01 04:45:26.203978', 'step': 20228, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:26.257540', 'step': 20228, 'epoch': 3} {'type': 'loss', 'content': 0.08894670754671097, 'timestamp': '2025-10-01 04:45:26.259658', 'step': 20229, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:45:26.313473', 'step': 20229, 'epoch': 3} {'type': 'loss', 'content': 0.09389141947031021, 'timestamp': '2025-10-01 04:45:26.315487', 'step': 20230, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:26.369096', 'step': 20230, 'epoch': 3} {'type': 'loss', 'content': 0.030741345137357712, 'timestamp': '2025-10-01 04:45:26.371170', 'step': 20231, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:26.424600', 'step': 20231, 'epoch': 3} {'type': 'loss', 'content': 0.08583159744739532, 'timestamp': '2025-10-01 04:45:26.431120', 'step': 20232, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:26.485950', 'step': 20232, 'epoch': 3} {'type': 'loss', 'content': 0.16349218785762787, 'timestamp': '2025-10-01 04:45:26.488140', 'step': 20233, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:26.545286', 'step': 20233, 'epoch': 3} {'type': 'loss', 'content': 0.20225445926189423, 'timestamp': '2025-10-01 04:45:26.547545', 'step': 20234, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:26.602182', 'step': 20234, 'epoch': 3} {'type': 'loss', 'content': 0.0922001525759697, 'timestamp': '2025-10-01 04:45:26.604427', 'step': 20235, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:26.658455', 'step': 20235, 'epoch': 3} {'type': 'loss', 'content': 0.16604606807231903, 'timestamp': '2025-10-01 04:45:26.664683', 'step': 20236, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:45:26.718009', 'step': 20236, 'epoch': 3} {'type': 'loss', 'content': 0.054765231907367706, 'timestamp': '2025-10-01 04:45:26.720072', 'step': 20237, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:26.773882', 'step': 20237, 'epoch': 3} {'type': 'loss', 'content': 0.06676661223173141, 'timestamp': '2025-10-01 04:45:26.775917', 'step': 20238, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:26.830117', 'step': 20238, 'epoch': 3} {'type': 'loss', 'content': 0.07046099752187729, 'timestamp': '2025-10-01 04:45:26.832431', 'step': 20239, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:26.889160', 'step': 20239, 'epoch': 3} {'type': 'loss', 'content': 0.1189269945025444, 'timestamp': '2025-10-01 04:45:26.895366', 'step': 20240, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:26.949560', 'step': 20240, 'epoch': 3} {'type': 'loss', 'content': 0.07704699039459229, 'timestamp': '2025-10-01 04:45:26.951536', 'step': 20241, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:45:27.004676', 'step': 20241, 'epoch': 3} {'type': 'loss', 'content': 0.055417731404304504, 'timestamp': '2025-10-01 04:45:27.006689', 'step': 20242, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:27.059846', 'step': 20242, 'epoch': 3} {'type': 'loss', 'content': 0.07678702473640442, 'timestamp': '2025-10-01 04:45:27.061866', 'step': 20243, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:27.115178', 'step': 20243, 'epoch': 3} {'type': 'loss', 'content': 0.17968472838401794, 'timestamp': '2025-10-01 04:45:27.120925', 'step': 20244, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:27.173550', 'step': 20244, 'epoch': 3} {'type': 'loss', 'content': 0.10646144300699234, 'timestamp': '2025-10-01 04:45:27.175554', 'step': 20245, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:27.228740', 'step': 20245, 'epoch': 3} {'type': 'loss', 'content': 0.038233477622270584, 'timestamp': '2025-10-01 04:45:27.231310', 'step': 20246, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:27.284930', 'step': 20246, 'epoch': 3} {'type': 'loss', 'content': 0.05206344276666641, 'timestamp': '2025-10-01 04:45:27.287142', 'step': 20247, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:27.341018', 'step': 20247, 'epoch': 3} {'type': 'loss', 'content': 0.06504305452108383, 'timestamp': '2025-10-01 04:45:27.346876', 'step': 20248, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:27.399508', 'step': 20248, 'epoch': 3} {'type': 'loss', 'content': 0.09575667977333069, 'timestamp': '2025-10-01 04:45:27.401724', 'step': 20249, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-01 04:45:27.454801', 'step': 20249, 'epoch': 3} {'type': 'loss', 'content': 0.05792868137359619, 'timestamp': '2025-10-01 04:45:27.456825', 'step': 20250, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:27.510857', 'step': 20250, 'epoch': 3} {'type': 'loss', 'content': 0.09077568352222443, 'timestamp': '2025-10-01 04:45:27.517644', 'step': 20251, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:27.570676', 'step': 20251, 'epoch': 3} {'type': 'loss', 'content': 0.10377946496009827, 'timestamp': '2025-10-01 04:45:27.576407', 'step': 20252, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:27.630013', 'step': 20252, 'epoch': 3} {'type': 'loss', 'content': 0.10097883641719818, 'timestamp': '2025-10-01 04:45:27.632529', 'step': 20253, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:27.691190', 'step': 20253, 'epoch': 3} {'type': 'loss', 'content': 0.006155192852020264, 'timestamp': '2025-10-01 04:45:27.693250', 'step': 20254, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:27.748709', 'step': 20254, 'epoch': 3} {'type': 'loss', 'content': 0.1885305494070053, 'timestamp': '2025-10-01 04:45:27.750779', 'step': 20255, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:27.804181', 'step': 20255, 'epoch': 3} {'type': 'loss', 'content': 0.1055585965514183, 'timestamp': '2025-10-01 04:45:27.809973', 'step': 20256, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:27.862702', 'step': 20256, 'epoch': 3} {'type': 'loss', 'content': 0.06566289812326431, 'timestamp': '2025-10-01 04:45:27.864788', 'step': 20257, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:27.920549', 'step': 20257, 'epoch': 3} {'type': 'loss', 'content': 0.08421861380338669, 'timestamp': '2025-10-01 04:45:27.922669', 'step': 20258, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:45:27.981912', 'step': 20258, 'epoch': 3} {'type': 'loss', 'content': 0.10272805392742157, 'timestamp': '2025-10-01 04:45:27.984106', 'step': 20259, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:28.043150', 'step': 20259, 'epoch': 3} {'type': 'loss', 'content': 0.07503198087215424, 'timestamp': '2025-10-01 04:45:28.050175', 'step': 20260, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:28.108954', 'step': 20260, 'epoch': 3} {'type': 'loss', 'content': 0.037663787603378296, 'timestamp': '2025-10-01 04:45:28.111614', 'step': 20261, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:28.171925', 'step': 20261, 'epoch': 3} {'type': 'loss', 'content': 0.0331689678132534, 'timestamp': '2025-10-01 04:45:28.181616', 'step': 20262, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:28.241502', 'step': 20262, 'epoch': 3} {'type': 'loss', 'content': 0.09372080862522125, 'timestamp': '2025-10-01 04:45:28.244110', 'step': 20263, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:28.304270', 'step': 20263, 'epoch': 3} {'type': 'loss', 'content': 0.10233211517333984, 'timestamp': '2025-10-01 04:45:28.311165', 'step': 20264, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:28.379422', 'step': 20264, 'epoch': 3} {'type': 'loss', 'content': 0.11897671222686768, 'timestamp': '2025-10-01 04:45:28.381827', 'step': 20265, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:28.440414', 'step': 20265, 'epoch': 3} {'type': 'loss', 'content': 0.09929133206605911, 'timestamp': '2025-10-01 04:45:28.442801', 'step': 20266, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:28.500845', 'step': 20266, 'epoch': 3} {'type': 'loss', 'content': 0.11893113702535629, 'timestamp': '2025-10-01 04:45:28.503225', 'step': 20267, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:28.561852', 'step': 20267, 'epoch': 3} {'type': 'loss', 'content': 0.1115938350558281, 'timestamp': '2025-10-01 04:45:28.568835', 'step': 20268, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:28.624862', 'step': 20268, 'epoch': 3} {'type': 'loss', 'content': 0.1023441031575203, 'timestamp': '2025-10-01 04:45:28.627146', 'step': 20269, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:45:28.682077', 'step': 20269, 'epoch': 3} {'type': 'loss', 'content': 0.15585124492645264, 'timestamp': '2025-10-01 04:45:28.684642', 'step': 20270, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:28.738661', 'step': 20270, 'epoch': 3} {'type': 'loss', 'content': 0.10292335599660873, 'timestamp': '2025-10-01 04:45:28.740953', 'step': 20271, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:28.794951', 'step': 20271, 'epoch': 3} {'type': 'loss', 'content': 0.12264212220907211, 'timestamp': '2025-10-01 04:45:28.801244', 'step': 20272, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:28.861935', 'step': 20272, 'epoch': 3} {'type': 'loss', 'content': 0.0348832942545414, 'timestamp': '2025-10-01 04:45:28.864003', 'step': 20273, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:45:28.917832', 'step': 20273, 'epoch': 3} {'type': 'loss', 'content': 0.06890717148780823, 'timestamp': '2025-10-01 04:45:28.920144', 'step': 20274, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:28.973846', 'step': 20274, 'epoch': 3} {'type': 'loss', 'content': 0.10172475874423981, 'timestamp': '2025-10-01 04:45:28.976165', 'step': 20275, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:29.030174', 'step': 20275, 'epoch': 3} {'type': 'loss', 'content': 0.10681697726249695, 'timestamp': '2025-10-01 04:45:29.036355', 'step': 20276, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:29.098939', 'step': 20276, 'epoch': 3} {'type': 'loss', 'content': 0.0996318832039833, 'timestamp': '2025-10-01 04:45:29.101514', 'step': 20277, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:45:29.155744', 'step': 20277, 'epoch': 3} {'type': 'loss', 'content': 0.09558054059743881, 'timestamp': '2025-10-01 04:45:29.158013', 'step': 20278, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:29.212806', 'step': 20278, 'epoch': 3} {'type': 'loss', 'content': 0.04656726121902466, 'timestamp': '2025-10-01 04:45:29.215321', 'step': 20279, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:29.269218', 'step': 20279, 'epoch': 3} {'type': 'loss', 'content': 0.11316733807325363, 'timestamp': '2025-10-01 04:45:29.274870', 'step': 20280, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:29.328301', 'step': 20280, 'epoch': 3} {'type': 'loss', 'content': 0.07139311730861664, 'timestamp': '2025-10-01 04:45:29.330717', 'step': 20281, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:29.384930', 'step': 20281, 'epoch': 3} {'type': 'loss', 'content': 0.07977885752916336, 'timestamp': '2025-10-01 04:45:29.387498', 'step': 20282, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:29.441941', 'step': 20282, 'epoch': 3} {'type': 'loss', 'content': 0.11996375769376755, 'timestamp': '2025-10-01 04:45:29.444410', 'step': 20283, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:29.498733', 'step': 20283, 'epoch': 3} {'type': 'loss', 'content': 0.07934386283159256, 'timestamp': '2025-10-01 04:45:29.504998', 'step': 20284, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:45:29.564170', 'step': 20284, 'epoch': 3} {'type': 'loss', 'content': 0.08263841271400452, 'timestamp': '2025-10-01 04:45:29.566576', 'step': 20285, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:29.619894', 'step': 20285, 'epoch': 3} {'type': 'loss', 'content': 0.09337764233350754, 'timestamp': '2025-10-01 04:45:29.622294', 'step': 20286, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:29.676622', 'step': 20286, 'epoch': 3} {'type': 'loss', 'content': 0.05551472678780556, 'timestamp': '2025-10-01 04:45:29.678825', 'step': 20287, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:29.731729', 'step': 20287, 'epoch': 3} {'type': 'loss', 'content': 0.05977877229452133, 'timestamp': '2025-10-01 04:45:29.737631', 'step': 20288, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:29.790445', 'step': 20288, 'epoch': 3} {'type': 'loss', 'content': 0.0949336513876915, 'timestamp': '2025-10-01 04:45:29.792600', 'step': 20289, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:29.845386', 'step': 20289, 'epoch': 3} {'type': 'loss', 'content': 0.09009038656949997, 'timestamp': '2025-10-01 04:45:29.848157', 'step': 20290, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:45:29.902636', 'step': 20290, 'epoch': 3} {'type': 'loss', 'content': 0.06206484138965607, 'timestamp': '2025-10-01 04:45:29.905020', 'step': 20291, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:29.957703', 'step': 20291, 'epoch': 3} {'type': 'loss', 'content': 0.12042612582445145, 'timestamp': '2025-10-01 04:45:29.963375', 'step': 20292, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:45:30.015878', 'step': 20292, 'epoch': 3} {'type': 'loss', 'content': 0.0683230608701706, 'timestamp': '2025-10-01 04:45:30.017926', 'step': 20293, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:30.071566', 'step': 20293, 'epoch': 3} {'type': 'loss', 'content': 0.07279554754495621, 'timestamp': '2025-10-01 04:45:30.073689', 'step': 20294, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:30.126658', 'step': 20294, 'epoch': 3} {'type': 'loss', 'content': 0.02798379585146904, 'timestamp': '2025-10-01 04:45:30.128708', 'step': 20295, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:30.181919', 'step': 20295, 'epoch': 3} {'type': 'loss', 'content': 0.024819843471050262, 'timestamp': '2025-10-01 04:45:30.187560', 'step': 20296, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:30.240410', 'step': 20296, 'epoch': 3} {'type': 'loss', 'content': 0.06350813060998917, 'timestamp': '2025-10-01 04:45:30.242658', 'step': 20297, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:30.295244', 'step': 20297, 'epoch': 3} {'type': 'loss', 'content': 0.05493609234690666, 'timestamp': '2025-10-01 04:45:30.297401', 'step': 20298, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:30.351064', 'step': 20298, 'epoch': 3} {'type': 'loss', 'content': 0.12030007690191269, 'timestamp': '2025-10-01 04:45:30.353570', 'step': 20299, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:30.407558', 'step': 20299, 'epoch': 3} {'type': 'loss', 'content': 0.04748377203941345, 'timestamp': '2025-10-01 04:45:30.413549', 'step': 20300, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:45:30.466890', 'step': 20300, 'epoch': 3} {'type': 'loss', 'content': 0.06370513886213303, 'timestamp': '2025-10-01 04:45:30.468927', 'step': 20301, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:30.522002', 'step': 20301, 'epoch': 3} {'type': 'loss', 'content': 0.10095124691724777, 'timestamp': '2025-10-01 04:45:30.524399', 'step': 20302, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:30.577681', 'step': 20302, 'epoch': 3} {'type': 'loss', 'content': 0.04104860499501228, 'timestamp': '2025-10-01 04:45:30.579813', 'step': 20303, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:30.633134', 'step': 20303, 'epoch': 3} {'type': 'loss', 'content': 0.07305173575878143, 'timestamp': '2025-10-01 04:45:30.639002', 'step': 20304, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:30.692209', 'step': 20304, 'epoch': 3} {'type': 'loss', 'content': 0.0595715269446373, 'timestamp': '2025-10-01 04:45:30.694333', 'step': 20305, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:30.748722', 'step': 20305, 'epoch': 3} {'type': 'loss', 'content': 0.013988832011818886, 'timestamp': '2025-10-01 04:45:30.750765', 'step': 20306, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:30.803970', 'step': 20306, 'epoch': 3} {'type': 'loss', 'content': 0.08289080113172531, 'timestamp': '2025-10-01 04:45:30.806045', 'step': 20307, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:45:30.870287', 'step': 20307, 'epoch': 3} {'type': 'loss', 'content': 0.10983850061893463, 'timestamp': '2025-10-01 04:45:30.875831', 'step': 20308, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:30.928139', 'step': 20308, 'epoch': 3} {'type': 'loss', 'content': 0.07964035123586655, 'timestamp': '2025-10-01 04:45:30.930177', 'step': 20309, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:30.983055', 'step': 20309, 'epoch': 3} {'type': 'loss', 'content': 0.13436517119407654, 'timestamp': '2025-10-01 04:45:30.985086', 'step': 20310, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:31.037983', 'step': 20310, 'epoch': 3} {'type': 'loss', 'content': 0.06261792033910751, 'timestamp': '2025-10-01 04:45:31.039973', 'step': 20311, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:31.093494', 'step': 20311, 'epoch': 3} {'type': 'loss', 'content': 0.13243895769119263, 'timestamp': '2025-10-01 04:45:31.099122', 'step': 20312, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:31.151528', 'step': 20312, 'epoch': 3} {'type': 'loss', 'content': 0.05485689267516136, 'timestamp': '2025-10-01 04:45:31.153500', 'step': 20313, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:31.206553', 'step': 20313, 'epoch': 3} {'type': 'loss', 'content': 0.061660002917051315, 'timestamp': '2025-10-01 04:45:31.208614', 'step': 20314, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:31.262191', 'step': 20314, 'epoch': 3} {'type': 'loss', 'content': 0.14756058156490326, 'timestamp': '2025-10-01 04:45:31.264339', 'step': 20315, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:31.317827', 'step': 20315, 'epoch': 3} {'type': 'loss', 'content': 0.10195624083280563, 'timestamp': '2025-10-01 04:45:31.323443', 'step': 20316, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:31.376501', 'step': 20316, 'epoch': 3} {'type': 'loss', 'content': 0.04655696451663971, 'timestamp': '2025-10-01 04:45:31.378527', 'step': 20317, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:31.431675', 'step': 20317, 'epoch': 3} {'type': 'loss', 'content': 0.1371924728155136, 'timestamp': '2025-10-01 04:45:31.433722', 'step': 20318, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:31.487642', 'step': 20318, 'epoch': 3} {'type': 'loss', 'content': 0.09271316230297089, 'timestamp': '2025-10-01 04:45:31.489936', 'step': 20319, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:31.543262', 'step': 20319, 'epoch': 3} {'type': 'loss', 'content': 0.07947061210870743, 'timestamp': '2025-10-01 04:45:31.549005', 'step': 20320, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:31.602573', 'step': 20320, 'epoch': 3} {'type': 'loss', 'content': 0.05434494465589523, 'timestamp': '2025-10-01 04:45:31.604539', 'step': 20321, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:31.657681', 'step': 20321, 'epoch': 3} {'type': 'loss', 'content': 0.08703581243753433, 'timestamp': '2025-10-01 04:45:31.659690', 'step': 20322, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:45:31.712675', 'step': 20322, 'epoch': 3} {'type': 'loss', 'content': 0.06094134971499443, 'timestamp': '2025-10-01 04:45:31.714668', 'step': 20323, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:31.767603', 'step': 20323, 'epoch': 3} {'type': 'loss', 'content': 0.024985482916235924, 'timestamp': '2025-10-01 04:45:31.773204', 'step': 20324, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:31.825842', 'step': 20324, 'epoch': 3} {'type': 'loss', 'content': 0.024570289999246597, 'timestamp': '2025-10-01 04:45:31.828961', 'step': 20325, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:31.884473', 'step': 20325, 'epoch': 3} {'type': 'loss', 'content': 0.07461895793676376, 'timestamp': '2025-10-01 04:45:31.886491', 'step': 20326, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:31.939187', 'step': 20326, 'epoch': 3} {'type': 'loss', 'content': 0.08818916976451874, 'timestamp': '2025-10-01 04:45:31.941189', 'step': 20327, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:31.993712', 'step': 20327, 'epoch': 3} {'type': 'loss', 'content': 0.061666298657655716, 'timestamp': '2025-10-01 04:45:31.999099', 'step': 20328, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:32.051919', 'step': 20328, 'epoch': 3} {'type': 'loss', 'content': 0.04805411398410797, 'timestamp': '2025-10-01 04:45:32.053953', 'step': 20329, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:32.106535', 'step': 20329, 'epoch': 3} {'type': 'loss', 'content': 0.13790589570999146, 'timestamp': '2025-10-01 04:45:32.108701', 'step': 20330, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:32.161696', 'step': 20330, 'epoch': 3} {'type': 'loss', 'content': 0.13040119409561157, 'timestamp': '2025-10-01 04:45:32.163883', 'step': 20331, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:45:32.216999', 'step': 20331, 'epoch': 3} {'type': 'loss', 'content': 0.11020615696907043, 'timestamp': '2025-10-01 04:45:32.222507', 'step': 20332, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:32.274651', 'step': 20332, 'epoch': 3} {'type': 'loss', 'content': 0.06398602575063705, 'timestamp': '2025-10-01 04:45:32.276585', 'step': 20333, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:32.329651', 'step': 20333, 'epoch': 3} {'type': 'loss', 'content': 0.05006993189454079, 'timestamp': '2025-10-01 04:45:32.331746', 'step': 20334, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:32.384994', 'step': 20334, 'epoch': 3} {'type': 'loss', 'content': 0.09502533823251724, 'timestamp': '2025-10-01 04:45:32.387067', 'step': 20335, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:32.439994', 'step': 20335, 'epoch': 3} {'type': 'loss', 'content': 0.1788422167301178, 'timestamp': '2025-10-01 04:45:32.445278', 'step': 20336, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:32.497072', 'step': 20336, 'epoch': 3} {'type': 'loss', 'content': 0.09138745069503784, 'timestamp': '2025-10-01 04:45:32.499206', 'step': 20337, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:45:32.551773', 'step': 20337, 'epoch': 3} {'type': 'loss', 'content': 0.16357463598251343, 'timestamp': '2025-10-01 04:45:32.553559', 'step': 20338, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:32.606801', 'step': 20338, 'epoch': 3} {'type': 'loss', 'content': 0.05572324991226196, 'timestamp': '2025-10-01 04:45:32.608876', 'step': 20339, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:32.661877', 'step': 20339, 'epoch': 3} {'type': 'loss', 'content': 0.04987822473049164, 'timestamp': '2025-10-01 04:45:32.667329', 'step': 20340, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:32.719884', 'step': 20340, 'epoch': 3} {'type': 'loss', 'content': 0.048660602420568466, 'timestamp': '2025-10-01 04:45:32.721675', 'step': 20341, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:32.775038', 'step': 20341, 'epoch': 3} {'type': 'loss', 'content': 0.12216434627771378, 'timestamp': '2025-10-01 04:45:32.776835', 'step': 20342, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:32.830370', 'step': 20342, 'epoch': 3} {'type': 'loss', 'content': 0.025830786675214767, 'timestamp': '2025-10-01 04:45:32.832211', 'step': 20343, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:32.885384', 'step': 20343, 'epoch': 3} {'type': 'loss', 'content': 0.09577246755361557, 'timestamp': '2025-10-01 04:45:32.890631', 'step': 20344, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:45:32.943106', 'step': 20344, 'epoch': 3} {'type': 'loss', 'content': 0.07094979286193848, 'timestamp': '2025-10-01 04:45:32.945211', 'step': 20345, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:32.998180', 'step': 20345, 'epoch': 3} {'type': 'loss', 'content': 0.04172608628869057, 'timestamp': '2025-10-01 04:45:33.000889', 'step': 20346, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:33.054027', 'step': 20346, 'epoch': 3} {'type': 'loss', 'content': 0.04939515143632889, 'timestamp': '2025-10-01 04:45:33.055993', 'step': 20347, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:33.109060', 'step': 20347, 'epoch': 3} {'type': 'loss', 'content': 0.11861221492290497, 'timestamp': '2025-10-01 04:45:33.114680', 'step': 20348, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:33.168120', 'step': 20348, 'epoch': 3} {'type': 'loss', 'content': 0.04822157695889473, 'timestamp': '2025-10-01 04:45:33.169873', 'step': 20349, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:33.222992', 'step': 20349, 'epoch': 3} {'type': 'loss', 'content': 0.05262400582432747, 'timestamp': '2025-10-01 04:45:33.224779', 'step': 20350, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:33.277719', 'step': 20350, 'epoch': 3} {'type': 'loss', 'content': 0.08285051584243774, 'timestamp': '2025-10-01 04:45:33.279859', 'step': 20351, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:33.332704', 'step': 20351, 'epoch': 3} {'type': 'loss', 'content': 0.08484751731157303, 'timestamp': '2025-10-01 04:45:33.337960', 'step': 20352, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:33.390458', 'step': 20352, 'epoch': 3} {'type': 'loss', 'content': 0.08574694395065308, 'timestamp': '2025-10-01 04:45:33.392227', 'step': 20353, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:33.445235', 'step': 20353, 'epoch': 3} {'type': 'loss', 'content': 0.0895393118262291, 'timestamp': '2025-10-01 04:45:33.447342', 'step': 20354, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:33.500813', 'step': 20354, 'epoch': 3} {'type': 'loss', 'content': 0.10108604282140732, 'timestamp': '2025-10-01 04:45:33.502712', 'step': 20355, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:33.554720', 'step': 20355, 'epoch': 3} {'type': 'loss', 'content': 0.0775526612997055, 'timestamp': '2025-10-01 04:45:33.560197', 'step': 20356, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:33.612562', 'step': 20356, 'epoch': 3} {'type': 'loss', 'content': 0.08616049587726593, 'timestamp': '2025-10-01 04:45:33.614602', 'step': 20357, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:33.667046', 'step': 20357, 'epoch': 3} {'type': 'loss', 'content': 0.08995984494686127, 'timestamp': '2025-10-01 04:45:33.669009', 'step': 20358, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:33.722433', 'step': 20358, 'epoch': 3} {'type': 'loss', 'content': 0.07236085832118988, 'timestamp': '2025-10-01 04:45:33.724623', 'step': 20359, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:33.777070', 'step': 20359, 'epoch': 3} {'type': 'loss', 'content': 0.08035427331924438, 'timestamp': '2025-10-01 04:45:33.782671', 'step': 20360, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:33.835524', 'step': 20360, 'epoch': 3} {'type': 'loss', 'content': 0.12323130667209625, 'timestamp': '2025-10-01 04:45:33.843684', 'step': 20361, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:33.898446', 'step': 20361, 'epoch': 3} {'type': 'loss', 'content': 0.06083935126662254, 'timestamp': '2025-10-01 04:45:33.900552', 'step': 20362, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:33.958200', 'step': 20362, 'epoch': 3} {'type': 'loss', 'content': 0.05027177557349205, 'timestamp': '2025-10-01 04:45:33.960203', 'step': 20363, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:34.020022', 'step': 20363, 'epoch': 3} {'type': 'loss', 'content': 0.06470152735710144, 'timestamp': '2025-10-01 04:45:34.025473', 'step': 20364, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:34.077170', 'step': 20364, 'epoch': 3} {'type': 'loss', 'content': 0.04201795160770416, 'timestamp': '2025-10-01 04:45:34.079046', 'step': 20365, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:34.131834', 'step': 20365, 'epoch': 3} {'type': 'loss', 'content': 0.13271616399288177, 'timestamp': '2025-10-01 04:45:34.134502', 'step': 20366, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:34.200797', 'step': 20366, 'epoch': 3} {'type': 'loss', 'content': 0.04345496743917465, 'timestamp': '2025-10-01 04:45:34.202797', 'step': 20367, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:34.256177', 'step': 20367, 'epoch': 3} {'type': 'loss', 'content': 0.042099736630916595, 'timestamp': '2025-10-01 04:45:34.261461', 'step': 20368, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:34.321876', 'step': 20368, 'epoch': 3} {'type': 'loss', 'content': 0.12600483000278473, 'timestamp': '2025-10-01 04:45:34.323788', 'step': 20369, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:45:34.378113', 'step': 20369, 'epoch': 3} {'type': 'loss', 'content': 0.07202914357185364, 'timestamp': '2025-10-01 04:45:34.379862', 'step': 20370, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:34.433890', 'step': 20370, 'epoch': 3} {'type': 'loss', 'content': 0.08148549497127533, 'timestamp': '2025-10-01 04:45:34.436060', 'step': 20371, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:34.489701', 'step': 20371, 'epoch': 3} {'type': 'loss', 'content': 0.04962727427482605, 'timestamp': '2025-10-01 04:45:34.495397', 'step': 20372, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:45:34.549116', 'step': 20372, 'epoch': 3} {'type': 'loss', 'content': 0.0332806222140789, 'timestamp': '2025-10-01 04:45:34.551239', 'step': 20373, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:34.607519', 'step': 20373, 'epoch': 3} {'type': 'loss', 'content': 0.1368543952703476, 'timestamp': '2025-10-01 04:45:34.609551', 'step': 20374, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:34.662910', 'step': 20374, 'epoch': 3} {'type': 'loss', 'content': 0.0924234390258789, 'timestamp': '2025-10-01 04:45:34.664814', 'step': 20375, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:34.749860', 'step': 20375, 'epoch': 3} {'type': 'loss', 'content': 0.06948797404766083, 'timestamp': '2025-10-01 04:45:34.763187', 'step': 20376, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:34.850443', 'step': 20376, 'epoch': 3} {'type': 'loss', 'content': 0.08480533212423325, 'timestamp': '2025-10-01 04:45:34.852554', 'step': 20377, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:34.938134', 'step': 20377, 'epoch': 3} {'type': 'loss', 'content': 0.14958436787128448, 'timestamp': '2025-10-01 04:45:34.940254', 'step': 20378, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:34.993697', 'step': 20378, 'epoch': 3} {'type': 'loss', 'content': 0.07156185060739517, 'timestamp': '2025-10-01 04:45:34.995916', 'step': 20379, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:35.049372', 'step': 20379, 'epoch': 3} {'type': 'loss', 'content': 0.10359993577003479, 'timestamp': '2025-10-01 04:45:35.055288', 'step': 20380, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:45:35.108545', 'step': 20380, 'epoch': 3} {'type': 'loss', 'content': 0.058151114732027054, 'timestamp': '2025-10-01 04:45:35.110670', 'step': 20381, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:35.164033', 'step': 20381, 'epoch': 3} {'type': 'loss', 'content': 0.08539624512195587, 'timestamp': '2025-10-01 04:45:35.166099', 'step': 20382, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:35.220649', 'step': 20382, 'epoch': 3} {'type': 'loss', 'content': 0.02956678904592991, 'timestamp': '2025-10-01 04:45:35.223060', 'step': 20383, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:35.276544', 'step': 20383, 'epoch': 3} {'type': 'loss', 'content': 0.02713584341108799, 'timestamp': '2025-10-01 04:45:35.282204', 'step': 20384, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:35.334473', 'step': 20384, 'epoch': 3} {'type': 'loss', 'content': 0.05888737365603447, 'timestamp': '2025-10-01 04:45:35.336944', 'step': 20385, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:35.389785', 'step': 20385, 'epoch': 3} {'type': 'loss', 'content': 0.11434611678123474, 'timestamp': '2025-10-01 04:45:35.391995', 'step': 20386, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:35.446020', 'step': 20386, 'epoch': 3} {'type': 'loss', 'content': 0.10674333572387695, 'timestamp': '2025-10-01 04:45:35.448075', 'step': 20387, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:35.501348', 'step': 20387, 'epoch': 3} {'type': 'loss', 'content': 0.15305718779563904, 'timestamp': '2025-10-01 04:45:35.507067', 'step': 20388, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:45:35.559898', 'step': 20388, 'epoch': 3} {'type': 'loss', 'content': 0.034660615026950836, 'timestamp': '2025-10-01 04:45:35.561945', 'step': 20389, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:35.614747', 'step': 20389, 'epoch': 3} {'type': 'loss', 'content': 0.1796768605709076, 'timestamp': '2025-10-01 04:45:35.616889', 'step': 20390, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:35.673203', 'step': 20390, 'epoch': 3} {'type': 'loss', 'content': 0.10443951934576035, 'timestamp': '2025-10-01 04:45:35.675562', 'step': 20391, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:35.729380', 'step': 20391, 'epoch': 3} {'type': 'loss', 'content': 0.05010314658284187, 'timestamp': '2025-10-01 04:45:35.735282', 'step': 20392, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:35.790988', 'step': 20392, 'epoch': 3} {'type': 'loss', 'content': 0.08098949491977692, 'timestamp': '2025-10-01 04:45:35.793030', 'step': 20393, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:35.846109', 'step': 20393, 'epoch': 3} {'type': 'loss', 'content': 0.07647890597581863, 'timestamp': '2025-10-01 04:45:35.848103', 'step': 20394, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:35.901494', 'step': 20394, 'epoch': 3} {'type': 'loss', 'content': 0.05313139036297798, 'timestamp': '2025-10-01 04:45:35.903542', 'step': 20395, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:35.956551', 'step': 20395, 'epoch': 3} {'type': 'loss', 'content': 0.0649043545126915, 'timestamp': '2025-10-01 04:45:35.962906', 'step': 20396, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:36.019243', 'step': 20396, 'epoch': 3} {'type': 'loss', 'content': 0.1249552071094513, 'timestamp': '2025-10-01 04:45:36.021350', 'step': 20397, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:36.074210', 'step': 20397, 'epoch': 3} {'type': 'loss', 'content': 0.07841082662343979, 'timestamp': '2025-10-01 04:45:36.076281', 'step': 20398, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:36.130054', 'step': 20398, 'epoch': 3} {'type': 'loss', 'content': 0.10326921939849854, 'timestamp': '2025-10-01 04:45:36.132058', 'step': 20399, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:36.185575', 'step': 20399, 'epoch': 3} {'type': 'loss', 'content': 0.07764071226119995, 'timestamp': '2025-10-01 04:45:36.191396', 'step': 20400, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:36.244428', 'step': 20400, 'epoch': 3} {'type': 'loss', 'content': 0.11294510960578918, 'timestamp': '2025-10-01 04:45:36.246538', 'step': 20401, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:36.299867', 'step': 20401, 'epoch': 3} {'type': 'loss', 'content': 0.039113108068704605, 'timestamp': '2025-10-01 04:45:36.302020', 'step': 20402, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:36.355544', 'step': 20402, 'epoch': 3} {'type': 'loss', 'content': 0.052234478294849396, 'timestamp': '2025-10-01 04:45:36.357632', 'step': 20403, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:36.413111', 'step': 20403, 'epoch': 3} {'type': 'loss', 'content': 0.08104037493467331, 'timestamp': '2025-10-01 04:45:36.419178', 'step': 20404, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:36.471862', 'step': 20404, 'epoch': 3} {'type': 'loss', 'content': 0.1014552116394043, 'timestamp': '2025-10-01 04:45:36.474006', 'step': 20405, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:36.527145', 'step': 20405, 'epoch': 3} {'type': 'loss', 'content': 0.12605759501457214, 'timestamp': '2025-10-01 04:45:36.529570', 'step': 20406, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:36.582601', 'step': 20406, 'epoch': 3} {'type': 'loss', 'content': 0.05797269567847252, 'timestamp': '2025-10-01 04:45:36.584590', 'step': 20407, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:36.637844', 'step': 20407, 'epoch': 3} {'type': 'loss', 'content': 0.07871086150407791, 'timestamp': '2025-10-01 04:45:36.643622', 'step': 20408, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:36.697097', 'step': 20408, 'epoch': 3} {'type': 'loss', 'content': 0.05663250386714935, 'timestamp': '2025-10-01 04:45:36.699092', 'step': 20409, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:36.752206', 'step': 20409, 'epoch': 3} {'type': 'loss', 'content': 0.12108279764652252, 'timestamp': '2025-10-01 04:45:36.755951', 'step': 20410, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:36.809755', 'step': 20410, 'epoch': 3} {'type': 'loss', 'content': 0.023695945739746094, 'timestamp': '2025-10-01 04:45:36.811831', 'step': 20411, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:36.864869', 'step': 20411, 'epoch': 3} {'type': 'loss', 'content': 0.051072463393211365, 'timestamp': '2025-10-01 04:45:36.870740', 'step': 20412, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:36.922915', 'step': 20412, 'epoch': 3} {'type': 'loss', 'content': 0.07399560511112213, 'timestamp': '2025-10-01 04:45:36.924973', 'step': 20413, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:45:36.979490', 'step': 20413, 'epoch': 3} {'type': 'loss', 'content': 0.05659248307347298, 'timestamp': '2025-10-01 04:45:36.981456', 'step': 20414, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:37.034039', 'step': 20414, 'epoch': 3} {'type': 'loss', 'content': 0.13507544994354248, 'timestamp': '2025-10-01 04:45:37.036119', 'step': 20415, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:37.090147', 'step': 20415, 'epoch': 3} {'type': 'loss', 'content': 0.15443673729896545, 'timestamp': '2025-10-01 04:45:37.095853', 'step': 20416, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:37.148331', 'step': 20416, 'epoch': 3} {'type': 'loss', 'content': 0.08929965645074844, 'timestamp': '2025-10-01 04:45:37.150415', 'step': 20417, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:37.203964', 'step': 20417, 'epoch': 3} {'type': 'loss', 'content': 0.03411387652158737, 'timestamp': '2025-10-01 04:45:37.206628', 'step': 20418, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:37.260700', 'step': 20418, 'epoch': 3} {'type': 'loss', 'content': 0.08216754347085953, 'timestamp': '2025-10-01 04:45:37.263313', 'step': 20419, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:45:37.317590', 'step': 20419, 'epoch': 3} {'type': 'loss', 'content': 0.1180381178855896, 'timestamp': '2025-10-01 04:45:37.323701', 'step': 20420, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:37.376947', 'step': 20420, 'epoch': 3} {'type': 'loss', 'content': 0.04499499872326851, 'timestamp': '2025-10-01 04:45:37.379470', 'step': 20421, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:37.434163', 'step': 20421, 'epoch': 3} {'type': 'loss', 'content': 0.1217724084854126, 'timestamp': '2025-10-01 04:45:37.436475', 'step': 20422, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:37.490979', 'step': 20422, 'epoch': 3} {'type': 'loss', 'content': 0.0014047670410946012, 'timestamp': '2025-10-01 04:45:37.493312', 'step': 20423, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:37.547843', 'step': 20423, 'epoch': 3} {'type': 'loss', 'content': 0.08003315329551697, 'timestamp': '2025-10-01 04:45:37.553740', 'step': 20424, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:45:37.607566', 'step': 20424, 'epoch': 3} {'type': 'loss', 'content': 0.07396023720502853, 'timestamp': '2025-10-01 04:45:37.610148', 'step': 20425, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:45:37.664828', 'step': 20425, 'epoch': 3} {'type': 'loss', 'content': 0.13596929609775543, 'timestamp': '2025-10-01 04:45:37.667169', 'step': 20426, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:37.721378', 'step': 20426, 'epoch': 3} {'type': 'loss', 'content': 0.13828416168689728, 'timestamp': '2025-10-01 04:45:37.723863', 'step': 20427, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:37.777684', 'step': 20427, 'epoch': 3} {'type': 'loss', 'content': 0.09263865649700165, 'timestamp': '2025-10-01 04:45:37.783541', 'step': 20428, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:37.836996', 'step': 20428, 'epoch': 3} {'type': 'loss', 'content': 0.1117958128452301, 'timestamp': '2025-10-01 04:45:37.839391', 'step': 20429, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:37.893711', 'step': 20429, 'epoch': 3} {'type': 'loss', 'content': 0.10157566517591476, 'timestamp': '2025-10-01 04:45:37.895877', 'step': 20430, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:37.950406', 'step': 20430, 'epoch': 3} {'type': 'loss', 'content': 0.09324231743812561, 'timestamp': '2025-10-01 04:45:37.952514', 'step': 20431, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:38.006690', 'step': 20431, 'epoch': 3} {'type': 'loss', 'content': 0.08145390450954437, 'timestamp': '2025-10-01 04:45:38.012952', 'step': 20432, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:45:38.068498', 'step': 20432, 'epoch': 3} {'type': 'loss', 'content': 0.07582253217697144, 'timestamp': '2025-10-01 04:45:38.070779', 'step': 20433, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:38.124592', 'step': 20433, 'epoch': 3} {'type': 'loss', 'content': 0.045369237661361694, 'timestamp': '2025-10-01 04:45:38.127113', 'step': 20434, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:45:38.181886', 'step': 20434, 'epoch': 3} {'type': 'loss', 'content': 0.06202249974012375, 'timestamp': '2025-10-01 04:45:38.184060', 'step': 20435, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:38.238395', 'step': 20435, 'epoch': 3} {'type': 'loss', 'content': 0.03756380081176758, 'timestamp': '2025-10-01 04:45:38.244550', 'step': 20436, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:45:38.298554', 'step': 20436, 'epoch': 3} {'type': 'loss', 'content': 0.08463724702596664, 'timestamp': '2025-10-01 04:45:38.300917', 'step': 20437, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:38.354404', 'step': 20437, 'epoch': 3} {'type': 'loss', 'content': 0.12028402835130692, 'timestamp': '2025-10-01 04:45:38.356579', 'step': 20438, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:38.412223', 'step': 20438, 'epoch': 3} {'type': 'loss', 'content': 0.08584312349557877, 'timestamp': '2025-10-01 04:45:38.414477', 'step': 20439, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:38.469540', 'step': 20439, 'epoch': 3} {'type': 'loss', 'content': 0.041162360459566116, 'timestamp': '2025-10-01 04:45:38.475720', 'step': 20440, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:38.528983', 'step': 20440, 'epoch': 3} {'type': 'loss', 'content': 0.06804079562425613, 'timestamp': '2025-10-01 04:45:38.531145', 'step': 20441, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:38.585778', 'step': 20441, 'epoch': 3} {'type': 'loss', 'content': 0.05391855910420418, 'timestamp': '2025-10-01 04:45:38.588660', 'step': 20442, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:45:38.643134', 'step': 20442, 'epoch': 3} {'type': 'loss', 'content': 0.12164386361837387, 'timestamp': '2025-10-01 04:45:38.645613', 'step': 20443, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:38.702698', 'step': 20443, 'epoch': 3} {'type': 'loss', 'content': 0.06176326423883438, 'timestamp': '2025-10-01 04:45:38.709088', 'step': 20444, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:38.761635', 'step': 20444, 'epoch': 3} {'type': 'loss', 'content': 0.03224804997444153, 'timestamp': '2025-10-01 04:45:38.768001', 'step': 20445, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-01 04:45:38.822249', 'step': 20445, 'epoch': 3} {'type': 'loss', 'content': 0.174763485789299, 'timestamp': '2025-10-01 04:45:38.824329', 'step': 20446, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:45:38.877646', 'step': 20446, 'epoch': 3} {'type': 'loss', 'content': 0.07963064312934875, 'timestamp': '2025-10-01 04:45:38.879715', 'step': 20447, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:38.932306', 'step': 20447, 'epoch': 3} {'type': 'loss', 'content': 0.1197558045387268, 'timestamp': '2025-10-01 04:45:38.938109', 'step': 20448, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:38.991127', 'step': 20448, 'epoch': 3} {'type': 'loss', 'content': 0.05943348631262779, 'timestamp': '2025-10-01 04:45:38.993411', 'step': 20449, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:39.050733', 'step': 20449, 'epoch': 3} {'type': 'loss', 'content': 0.03326937556266785, 'timestamp': '2025-10-01 04:45:39.055983', 'step': 20450, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:39.110067', 'step': 20450, 'epoch': 3} {'type': 'loss', 'content': 0.13075047731399536, 'timestamp': '2025-10-01 04:45:39.119108', 'step': 20451, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:39.177262', 'step': 20451, 'epoch': 3} {'type': 'loss', 'content': 0.159587100148201, 'timestamp': '2025-10-01 04:45:39.190997', 'step': 20452, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:39.243994', 'step': 20452, 'epoch': 3} {'type': 'loss', 'content': 0.08264826983213425, 'timestamp': '2025-10-01 04:45:39.246102', 'step': 20453, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:39.300395', 'step': 20453, 'epoch': 3} {'type': 'loss', 'content': 0.13562992215156555, 'timestamp': '2025-10-01 04:45:39.303388', 'step': 20454, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-01 04:45:39.360114', 'step': 20454, 'epoch': 3} {'type': 'loss', 'content': 0.11595436185598373, 'timestamp': '2025-10-01 04:45:39.362477', 'step': 20455, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:39.416599', 'step': 20455, 'epoch': 3} {'type': 'loss', 'content': 0.10729017853736877, 'timestamp': '2025-10-01 04:45:39.422438', 'step': 20456, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:39.475651', 'step': 20456, 'epoch': 3} {'type': 'loss', 'content': 0.061907537281513214, 'timestamp': '2025-10-01 04:45:39.477967', 'step': 20457, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:39.538941', 'step': 20457, 'epoch': 3} {'type': 'loss', 'content': 0.08639606833457947, 'timestamp': '2025-10-01 04:45:39.540675', 'step': 20458, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:45:39.594072', 'step': 20458, 'epoch': 3} {'type': 'loss', 'content': 0.1325322836637497, 'timestamp': '2025-10-01 04:45:39.596437', 'step': 20459, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:45:39.652702', 'step': 20459, 'epoch': 3} {'type': 'loss', 'content': 0.07713168114423752, 'timestamp': '2025-10-01 04:45:39.658656', 'step': 20460, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:45:39.712409', 'step': 20460, 'epoch': 3} {'type': 'loss', 'content': 0.06354972720146179, 'timestamp': '2025-10-01 04:45:39.714648', 'step': 20461, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:39.767769', 'step': 20461, 'epoch': 3} {'type': 'loss', 'content': 0.07905220240354538, 'timestamp': '2025-10-01 04:45:39.769895', 'step': 20462, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:39.823037', 'step': 20462, 'epoch': 3} {'type': 'loss', 'content': 0.07629460841417313, 'timestamp': '2025-10-01 04:45:39.825180', 'step': 20463, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:39.878599', 'step': 20463, 'epoch': 3} {'type': 'loss', 'content': 0.08240644633769989, 'timestamp': '2025-10-01 04:45:39.884133', 'step': 20464, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:39.936376', 'step': 20464, 'epoch': 3} {'type': 'loss', 'content': 0.05343502387404442, 'timestamp': '2025-10-01 04:45:39.938233', 'step': 20465, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:39.991206', 'step': 20465, 'epoch': 3} {'type': 'loss', 'content': 0.09340124577283859, 'timestamp': '2025-10-01 04:45:39.993311', 'step': 20466, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:40.046954', 'step': 20466, 'epoch': 3} {'type': 'loss', 'content': 0.04968385398387909, 'timestamp': '2025-10-01 04:45:40.049007', 'step': 20467, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:40.102042', 'step': 20467, 'epoch': 3} {'type': 'loss', 'content': 0.09813317656517029, 'timestamp': '2025-10-01 04:45:40.107879', 'step': 20468, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:40.160349', 'step': 20468, 'epoch': 3} {'type': 'loss', 'content': 0.13820905983448029, 'timestamp': '2025-10-01 04:45:40.162378', 'step': 20469, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:40.215411', 'step': 20469, 'epoch': 3} {'type': 'loss', 'content': 0.08597113192081451, 'timestamp': '2025-10-01 04:45:40.217455', 'step': 20470, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:40.271420', 'step': 20470, 'epoch': 3} {'type': 'loss', 'content': 0.20438270270824432, 'timestamp': '2025-10-01 04:45:40.273171', 'step': 20471, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:40.326398', 'step': 20471, 'epoch': 3} {'type': 'loss', 'content': 0.1833772212266922, 'timestamp': '2025-10-01 04:45:40.331865', 'step': 20472, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:40.385002', 'step': 20472, 'epoch': 3} {'type': 'loss', 'content': 0.15523776412010193, 'timestamp': '2025-10-01 04:45:40.386663', 'step': 20473, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:40.440173', 'step': 20473, 'epoch': 3} {'type': 'loss', 'content': 0.10979297757148743, 'timestamp': '2025-10-01 04:45:40.442037', 'step': 20474, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:40.495527', 'step': 20474, 'epoch': 3} {'type': 'loss', 'content': 0.06461367756128311, 'timestamp': '2025-10-01 04:45:40.497717', 'step': 20475, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:40.550247', 'step': 20475, 'epoch': 3} {'type': 'loss', 'content': 0.05840222164988518, 'timestamp': '2025-10-01 04:45:40.556306', 'step': 20476, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:40.609457', 'step': 20476, 'epoch': 3} {'type': 'loss', 'content': 0.08797530829906464, 'timestamp': '2025-10-01 04:45:40.611597', 'step': 20477, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:45:40.665595', 'step': 20477, 'epoch': 3} {'type': 'loss', 'content': 0.07149103283882141, 'timestamp': '2025-10-01 04:45:40.667884', 'step': 20478, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:40.721582', 'step': 20478, 'epoch': 3} {'type': 'loss', 'content': 0.06733481585979462, 'timestamp': '2025-10-01 04:45:40.723323', 'step': 20479, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:40.776705', 'step': 20479, 'epoch': 3} {'type': 'loss', 'content': 0.05738174170255661, 'timestamp': '2025-10-01 04:45:40.782433', 'step': 20480, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:40.835206', 'step': 20480, 'epoch': 3} {'type': 'loss', 'content': 0.07322537153959274, 'timestamp': '2025-10-01 04:45:40.837444', 'step': 20481, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:40.890748', 'step': 20481, 'epoch': 3} {'type': 'loss', 'content': 0.01683671586215496, 'timestamp': '2025-10-01 04:45:40.892821', 'step': 20482, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:45:40.946306', 'step': 20482, 'epoch': 3} {'type': 'loss', 'content': 0.08346231281757355, 'timestamp': '2025-10-01 04:45:40.948352', 'step': 20483, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-01 04:45:41.002742', 'step': 20483, 'epoch': 3} {'type': 'loss', 'content': 0.03792254626750946, 'timestamp': '2025-10-01 04:45:41.008651', 'step': 20484, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:45:41.062507', 'step': 20484, 'epoch': 3} {'type': 'loss', 'content': 0.10227636992931366, 'timestamp': '2025-10-01 04:45:41.064379', 'step': 20485, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:41.121328', 'step': 20485, 'epoch': 3} {'type': 'loss', 'content': 0.2840211093425751, 'timestamp': '2025-10-01 04:45:41.123952', 'step': 20486, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:41.177790', 'step': 20486, 'epoch': 3} {'type': 'loss', 'content': 0.0696866437792778, 'timestamp': '2025-10-01 04:45:41.180239', 'step': 20487, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:45:41.236061', 'step': 20487, 'epoch': 3} {'type': 'loss', 'content': 0.06165570020675659, 'timestamp': '2025-10-01 04:45:41.242381', 'step': 20488, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:41.295613', 'step': 20488, 'epoch': 3} {'type': 'loss', 'content': 0.10548760741949081, 'timestamp': '2025-10-01 04:45:41.297665', 'step': 20489, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:41.350385', 'step': 20489, 'epoch': 3} {'type': 'loss', 'content': 0.09884171932935715, 'timestamp': '2025-10-01 04:45:41.360404', 'step': 20490, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:41.415955', 'step': 20490, 'epoch': 3} {'type': 'loss', 'content': 0.050544243305921555, 'timestamp': '2025-10-01 04:45:41.418142', 'step': 20491, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:45:41.474433', 'step': 20491, 'epoch': 3} {'type': 'loss', 'content': 0.0666843131184578, 'timestamp': '2025-10-01 04:45:41.483255', 'step': 20492, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:41.538604', 'step': 20492, 'epoch': 3} {'type': 'loss', 'content': 0.014818483032286167, 'timestamp': '2025-10-01 04:45:41.540254', 'step': 20493, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:41.602362', 'step': 20493, 'epoch': 3} {'type': 'loss', 'content': 0.09151259064674377, 'timestamp': '2025-10-01 04:45:41.604514', 'step': 20494, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:45:41.661365', 'step': 20494, 'epoch': 3} {'type': 'loss', 'content': 0.06373526155948639, 'timestamp': '2025-10-01 04:45:41.663398', 'step': 20495, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:41.721102', 'step': 20495, 'epoch': 3} {'type': 'loss', 'content': 0.02710292860865593, 'timestamp': '2025-10-01 04:45:41.727855', 'step': 20496, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:41.783586', 'step': 20496, 'epoch': 3} {'type': 'loss', 'content': 0.10353988409042358, 'timestamp': '2025-10-01 04:45:41.785672', 'step': 20497, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:41.841875', 'step': 20497, 'epoch': 3} {'type': 'loss', 'content': 0.06582890450954437, 'timestamp': '2025-10-01 04:45:41.843946', 'step': 20498, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:41.903196', 'step': 20498, 'epoch': 3} {'type': 'loss', 'content': 0.06493014842271805, 'timestamp': '2025-10-01 04:45:41.905601', 'step': 20499, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:45:41.959989', 'step': 20499, 'epoch': 3} {'type': 'loss', 'content': 0.1465933918952942, 'timestamp': '2025-10-01 04:45:41.965958', 'step': 20500, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 20500', 'timestamp': '2025-10-01 04:45:42.342810', 'step': 20500, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:42.400376', 'step': 20500, 'epoch': 3} {'type': 'loss', 'content': 0.06237657368183136, 'timestamp': '2025-10-01 04:45:42.402049', 'step': 20501, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:42.456175', 'step': 20501, 'epoch': 3} {'type': 'loss', 'content': 0.04079867899417877, 'timestamp': '2025-10-01 04:45:42.458149', 'step': 20502, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:42.511473', 'step': 20502, 'epoch': 3} {'type': 'loss', 'content': 0.10748913139104843, 'timestamp': '2025-10-01 04:45:42.513591', 'step': 20503, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:42.566615', 'step': 20503, 'epoch': 3} {'type': 'loss', 'content': 0.08819212764501572, 'timestamp': '2025-10-01 04:45:42.572601', 'step': 20504, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-01 04:45:55.508453', 'step': 20504, 'epoch': 3} {'type': 'pplx', 'content': 9788.505982290162, 'timestamp': '2025-10-01 04:45:55.511499', 'step': 20504, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:55.566663', 'step': 20504, 'epoch': 3} {'type': 'loss', 'content': 0.1371571570634842, 'timestamp': '2025-10-01 04:45:55.568788', 'step': 20505, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:55.622971', 'step': 20505, 'epoch': 3} {'type': 'loss', 'content': 0.020929209887981415, 'timestamp': '2025-10-01 04:45:55.625093', 'step': 20506, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:45:55.679079', 'step': 20506, 'epoch': 3} {'type': 'loss', 'content': 0.03815754875540733, 'timestamp': '2025-10-01 04:45:55.681144', 'step': 20507, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:55.734415', 'step': 20507, 'epoch': 3} {'type': 'loss', 'content': 0.05862155929207802, 'timestamp': '2025-10-01 04:45:55.740793', 'step': 20508, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:55.793454', 'step': 20508, 'epoch': 3} {'type': 'loss', 'content': 0.07428816705942154, 'timestamp': '2025-10-01 04:45:55.795375', 'step': 20509, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:55.848711', 'step': 20509, 'epoch': 3} {'type': 'loss', 'content': 0.06090717762708664, 'timestamp': '2025-10-01 04:45:55.850804', 'step': 20510, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:55.905741', 'step': 20510, 'epoch': 3} {'type': 'loss', 'content': 0.12344390898942947, 'timestamp': '2025-10-01 04:45:55.907794', 'step': 20511, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:45:55.961273', 'step': 20511, 'epoch': 3} {'type': 'loss', 'content': 0.060174185782670975, 'timestamp': '2025-10-01 04:45:55.967483', 'step': 20512, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:56.019905', 'step': 20512, 'epoch': 3} {'type': 'loss', 'content': 0.0980706438422203, 'timestamp': '2025-10-01 04:45:56.022307', 'step': 20513, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:56.075840', 'step': 20513, 'epoch': 3} {'type': 'loss', 'content': 0.08601141721010208, 'timestamp': '2025-10-01 04:45:56.078491', 'step': 20514, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:56.132187', 'step': 20514, 'epoch': 3} {'type': 'loss', 'content': 0.08292233198881149, 'timestamp': '2025-10-01 04:45:56.134248', 'step': 20515, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:45:56.187134', 'step': 20515, 'epoch': 3} {'type': 'loss', 'content': 0.06951188296079636, 'timestamp': '2025-10-01 04:45:56.192827', 'step': 20516, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:56.245601', 'step': 20516, 'epoch': 3} {'type': 'loss', 'content': 0.20770274102687836, 'timestamp': '2025-10-01 04:45:56.247677', 'step': 20517, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:56.300551', 'step': 20517, 'epoch': 3} {'type': 'loss', 'content': 0.07207680493593216, 'timestamp': '2025-10-01 04:45:56.302641', 'step': 20518, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:56.357348', 'step': 20518, 'epoch': 3} {'type': 'loss', 'content': 0.10556700080633163, 'timestamp': '2025-10-01 04:45:56.359406', 'step': 20519, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:56.427814', 'step': 20519, 'epoch': 3} {'type': 'loss', 'content': 0.08505203574895859, 'timestamp': '2025-10-01 04:45:56.434822', 'step': 20520, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:56.488627', 'step': 20520, 'epoch': 3} {'type': 'loss', 'content': 0.11407957971096039, 'timestamp': '2025-10-01 04:45:56.490629', 'step': 20521, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:56.544562', 'step': 20521, 'epoch': 3} {'type': 'loss', 'content': 0.12485308945178986, 'timestamp': '2025-10-01 04:45:56.546615', 'step': 20522, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:56.600405', 'step': 20522, 'epoch': 3} {'type': 'loss', 'content': 0.11629777401685715, 'timestamp': '2025-10-01 04:45:56.602570', 'step': 20523, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:56.657028', 'step': 20523, 'epoch': 3} {'type': 'loss', 'content': 0.06078134477138519, 'timestamp': '2025-10-01 04:45:56.663036', 'step': 20524, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:56.715522', 'step': 20524, 'epoch': 3} {'type': 'loss', 'content': 0.10596086084842682, 'timestamp': '2025-10-01 04:45:56.718183', 'step': 20525, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:56.772199', 'step': 20525, 'epoch': 3} {'type': 'loss', 'content': 0.04075908288359642, 'timestamp': '2025-10-01 04:45:56.775717', 'step': 20526, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:45:56.837328', 'step': 20526, 'epoch': 3} {'type': 'loss', 'content': 0.0873427465558052, 'timestamp': '2025-10-01 04:45:56.839354', 'step': 20527, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:56.892430', 'step': 20527, 'epoch': 3} {'type': 'loss', 'content': 0.12460359930992126, 'timestamp': '2025-10-01 04:45:56.898086', 'step': 20528, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:56.951142', 'step': 20528, 'epoch': 3} {'type': 'loss', 'content': 0.07091262191534042, 'timestamp': '2025-10-01 04:45:56.953475', 'step': 20529, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:45:57.007592', 'step': 20529, 'epoch': 3} {'type': 'loss', 'content': 0.06145303323864937, 'timestamp': '2025-10-01 04:45:57.009572', 'step': 20530, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:57.062576', 'step': 20530, 'epoch': 3} {'type': 'loss', 'content': 0.060803793370723724, 'timestamp': '2025-10-01 04:45:57.064737', 'step': 20531, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:45:57.118188', 'step': 20531, 'epoch': 3} {'type': 'loss', 'content': 0.052448976784944534, 'timestamp': '2025-10-01 04:45:57.123904', 'step': 20532, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:57.177174', 'step': 20532, 'epoch': 3} {'type': 'loss', 'content': 0.12153083831071854, 'timestamp': '2025-10-01 04:45:57.179272', 'step': 20533, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:57.232505', 'step': 20533, 'epoch': 3} {'type': 'loss', 'content': 0.08178121596574783, 'timestamp': '2025-10-01 04:45:57.234668', 'step': 20534, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:57.289067', 'step': 20534, 'epoch': 3} {'type': 'loss', 'content': 0.07658654451370239, 'timestamp': '2025-10-01 04:45:57.291358', 'step': 20535, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:57.345847', 'step': 20535, 'epoch': 3} {'type': 'loss', 'content': 0.050915203988552094, 'timestamp': '2025-10-01 04:45:57.352433', 'step': 20536, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:57.405615', 'step': 20536, 'epoch': 3} {'type': 'loss', 'content': 0.1078067198395729, 'timestamp': '2025-10-01 04:45:57.407842', 'step': 20537, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:57.461420', 'step': 20537, 'epoch': 3} {'type': 'loss', 'content': 0.08789515495300293, 'timestamp': '2025-10-01 04:45:57.463633', 'step': 20538, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:45:57.517182', 'step': 20538, 'epoch': 3} {'type': 'loss', 'content': 0.07009103894233704, 'timestamp': '2025-10-01 04:45:57.519262', 'step': 20539, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:57.572719', 'step': 20539, 'epoch': 3} {'type': 'loss', 'content': 0.11466781049966812, 'timestamp': '2025-10-01 04:45:57.578581', 'step': 20540, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:57.631520', 'step': 20540, 'epoch': 3} {'type': 'loss', 'content': 0.16322366893291473, 'timestamp': '2025-10-01 04:45:57.633651', 'step': 20541, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:45:57.690696', 'step': 20541, 'epoch': 3} {'type': 'loss', 'content': 0.0944509357213974, 'timestamp': '2025-10-01 04:45:57.693099', 'step': 20542, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:57.746662', 'step': 20542, 'epoch': 3} {'type': 'loss', 'content': 0.11427178233861923, 'timestamp': '2025-10-01 04:45:57.749004', 'step': 20543, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:57.802330', 'step': 20543, 'epoch': 3} {'type': 'loss', 'content': 0.1400987058877945, 'timestamp': '2025-10-01 04:45:57.808046', 'step': 20544, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:57.861055', 'step': 20544, 'epoch': 3} {'type': 'loss', 'content': 0.1304548680782318, 'timestamp': '2025-10-01 04:45:57.863898', 'step': 20545, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:57.925564', 'step': 20545, 'epoch': 3} {'type': 'loss', 'content': 0.1771746277809143, 'timestamp': '2025-10-01 04:45:57.927773', 'step': 20546, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:57.989188', 'step': 20546, 'epoch': 3} {'type': 'loss', 'content': 0.14615479111671448, 'timestamp': '2025-10-01 04:45:57.991285', 'step': 20547, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:58.044456', 'step': 20547, 'epoch': 3} {'type': 'loss', 'content': 0.10704261064529419, 'timestamp': '2025-10-01 04:45:58.050127', 'step': 20548, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:58.102759', 'step': 20548, 'epoch': 3} {'type': 'loss', 'content': 0.06622184067964554, 'timestamp': '2025-10-01 04:45:58.104988', 'step': 20549, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:58.159345', 'step': 20549, 'epoch': 3} {'type': 'loss', 'content': 0.10059085488319397, 'timestamp': '2025-10-01 04:45:58.161529', 'step': 20550, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:58.214463', 'step': 20550, 'epoch': 3} {'type': 'loss', 'content': 0.1098661944270134, 'timestamp': '2025-10-01 04:45:58.216495', 'step': 20551, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:58.269179', 'step': 20551, 'epoch': 3} {'type': 'loss', 'content': 0.06590766459703445, 'timestamp': '2025-10-01 04:45:58.274888', 'step': 20552, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:58.327704', 'step': 20552, 'epoch': 3} {'type': 'loss', 'content': 0.12252770364284515, 'timestamp': '2025-10-01 04:45:58.329743', 'step': 20553, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:58.383014', 'step': 20553, 'epoch': 3} {'type': 'loss', 'content': 0.05640771612524986, 'timestamp': '2025-10-01 04:45:58.385232', 'step': 20554, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:58.438427', 'step': 20554, 'epoch': 3} {'type': 'loss', 'content': 0.13322992622852325, 'timestamp': '2025-10-01 04:45:58.440668', 'step': 20555, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:58.494010', 'step': 20555, 'epoch': 3} {'type': 'loss', 'content': 0.11088679730892181, 'timestamp': '2025-10-01 04:45:58.500719', 'step': 20556, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:58.559017', 'step': 20556, 'epoch': 3} {'type': 'loss', 'content': 0.06550945341587067, 'timestamp': '2025-10-01 04:45:58.561416', 'step': 20557, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:45:58.614963', 'step': 20557, 'epoch': 3} {'type': 'loss', 'content': 0.08621614426374435, 'timestamp': '2025-10-01 04:45:58.617056', 'step': 20558, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:58.670073', 'step': 20558, 'epoch': 3} {'type': 'loss', 'content': 0.14361581206321716, 'timestamp': '2025-10-01 04:45:58.672189', 'step': 20559, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:45:58.725694', 'step': 20559, 'epoch': 3} {'type': 'loss', 'content': 0.04468054324388504, 'timestamp': '2025-10-01 04:45:58.731502', 'step': 20560, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:58.783997', 'step': 20560, 'epoch': 3} {'type': 'loss', 'content': 0.10314454883337021, 'timestamp': '2025-10-01 04:45:58.786174', 'step': 20561, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:45:58.839242', 'step': 20561, 'epoch': 3} {'type': 'loss', 'content': 0.21802791953086853, 'timestamp': '2025-10-01 04:45:58.841379', 'step': 20562, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:45:58.894686', 'step': 20562, 'epoch': 3} {'type': 'loss', 'content': 0.08226487785577774, 'timestamp': '2025-10-01 04:45:58.896880', 'step': 20563, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:58.963089', 'step': 20563, 'epoch': 3} {'type': 'loss', 'content': 0.12080506235361099, 'timestamp': '2025-10-01 04:45:58.969857', 'step': 20564, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:59.022282', 'step': 20564, 'epoch': 3} {'type': 'loss', 'content': 0.10665284097194672, 'timestamp': '2025-10-01 04:45:59.024382', 'step': 20565, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:59.077913', 'step': 20565, 'epoch': 3} {'type': 'loss', 'content': 0.08970851451158524, 'timestamp': '2025-10-01 04:45:59.080089', 'step': 20566, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:59.133814', 'step': 20566, 'epoch': 3} {'type': 'loss', 'content': 0.03588990867137909, 'timestamp': '2025-10-01 04:45:59.135954', 'step': 20567, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:45:59.190240', 'step': 20567, 'epoch': 3} {'type': 'loss', 'content': 0.12622202932834625, 'timestamp': '2025-10-01 04:45:59.195985', 'step': 20568, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:59.249330', 'step': 20568, 'epoch': 3} {'type': 'loss', 'content': 0.08194630593061447, 'timestamp': '2025-10-01 04:45:59.251434', 'step': 20569, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:59.304640', 'step': 20569, 'epoch': 3} {'type': 'loss', 'content': 0.0798717513680458, 'timestamp': '2025-10-01 04:45:59.306607', 'step': 20570, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:59.360130', 'step': 20570, 'epoch': 3} {'type': 'loss', 'content': 0.08362125605344772, 'timestamp': '2025-10-01 04:45:59.362389', 'step': 20571, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:59.416885', 'step': 20571, 'epoch': 3} {'type': 'loss', 'content': 0.020794440060853958, 'timestamp': '2025-10-01 04:45:59.423136', 'step': 20572, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:59.476895', 'step': 20572, 'epoch': 3} {'type': 'loss', 'content': 0.05058518797159195, 'timestamp': '2025-10-01 04:45:59.478955', 'step': 20573, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:45:59.533004', 'step': 20573, 'epoch': 3} {'type': 'loss', 'content': 0.10546315461397171, 'timestamp': '2025-10-01 04:45:59.535128', 'step': 20574, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:59.590048', 'step': 20574, 'epoch': 3} {'type': 'loss', 'content': 0.11216691881418228, 'timestamp': '2025-10-01 04:45:59.592233', 'step': 20575, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:59.646682', 'step': 20575, 'epoch': 3} {'type': 'loss', 'content': 0.12529350817203522, 'timestamp': '2025-10-01 04:45:59.652941', 'step': 20576, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:59.706292', 'step': 20576, 'epoch': 3} {'type': 'loss', 'content': 0.061468277126550674, 'timestamp': '2025-10-01 04:45:59.708279', 'step': 20577, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:45:59.761685', 'step': 20577, 'epoch': 3} {'type': 'loss', 'content': 0.17113079130649567, 'timestamp': '2025-10-01 04:45:59.763783', 'step': 20578, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:59.817839', 'step': 20578, 'epoch': 3} {'type': 'loss', 'content': 0.06719571352005005, 'timestamp': '2025-10-01 04:45:59.819777', 'step': 20579, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:45:59.875897', 'step': 20579, 'epoch': 3} {'type': 'loss', 'content': 0.0596776083111763, 'timestamp': '2025-10-01 04:45:59.881913', 'step': 20580, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:45:59.947716', 'step': 20580, 'epoch': 3} {'type': 'loss', 'content': 0.060921069234609604, 'timestamp': '2025-10-01 04:45:59.950663', 'step': 20581, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:00.014956', 'step': 20581, 'epoch': 3} {'type': 'loss', 'content': 0.08517378568649292, 'timestamp': '2025-10-01 04:46:00.017465', 'step': 20582, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:00.070718', 'step': 20582, 'epoch': 3} {'type': 'loss', 'content': 0.05738668143749237, 'timestamp': '2025-10-01 04:46:00.072865', 'step': 20583, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:00.125891', 'step': 20583, 'epoch': 3} {'type': 'loss', 'content': 0.04827355593442917, 'timestamp': '2025-10-01 04:46:00.131703', 'step': 20584, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:00.184373', 'step': 20584, 'epoch': 3} {'type': 'loss', 'content': 0.031996216624975204, 'timestamp': '2025-10-01 04:46:00.186603', 'step': 20585, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:46:00.239717', 'step': 20585, 'epoch': 3} {'type': 'loss', 'content': 0.04907780885696411, 'timestamp': '2025-10-01 04:46:00.242134', 'step': 20586, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:00.296224', 'step': 20586, 'epoch': 3} {'type': 'loss', 'content': 0.04858807101845741, 'timestamp': '2025-10-01 04:46:00.298386', 'step': 20587, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:00.351896', 'step': 20587, 'epoch': 3} {'type': 'loss', 'content': 0.05686870962381363, 'timestamp': '2025-10-01 04:46:00.358574', 'step': 20588, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:46:00.411204', 'step': 20588, 'epoch': 3} {'type': 'loss', 'content': 0.030251365154981613, 'timestamp': '2025-10-01 04:46:00.413357', 'step': 20589, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:00.466008', 'step': 20589, 'epoch': 3} {'type': 'loss', 'content': 0.04412127286195755, 'timestamp': '2025-10-01 04:46:00.468147', 'step': 20590, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:00.523157', 'step': 20590, 'epoch': 3} {'type': 'loss', 'content': 0.09360825270414352, 'timestamp': '2025-10-01 04:46:00.525266', 'step': 20591, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:00.579104', 'step': 20591, 'epoch': 3} {'type': 'loss', 'content': 0.09925615787506104, 'timestamp': '2025-10-01 04:46:00.584834', 'step': 20592, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:46:00.638712', 'step': 20592, 'epoch': 3} {'type': 'loss', 'content': 0.1377459019422531, 'timestamp': '2025-10-01 04:46:00.640871', 'step': 20593, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:00.703179', 'step': 20593, 'epoch': 3} {'type': 'loss', 'content': 0.058917950838804245, 'timestamp': '2025-10-01 04:46:00.705482', 'step': 20594, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:00.759422', 'step': 20594, 'epoch': 3} {'type': 'loss', 'content': 0.07342066615819931, 'timestamp': '2025-10-01 04:46:00.761231', 'step': 20595, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:00.814371', 'step': 20595, 'epoch': 3} {'type': 'loss', 'content': 0.10882314294576645, 'timestamp': '2025-10-01 04:46:00.820319', 'step': 20596, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:00.874606', 'step': 20596, 'epoch': 3} {'type': 'loss', 'content': 0.06820759177207947, 'timestamp': '2025-10-01 04:46:00.876726', 'step': 20597, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:00.930336', 'step': 20597, 'epoch': 3} {'type': 'loss', 'content': 0.03948727250099182, 'timestamp': '2025-10-01 04:46:00.932426', 'step': 20598, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:00.985959', 'step': 20598, 'epoch': 3} {'type': 'loss', 'content': 0.05235568434000015, 'timestamp': '2025-10-01 04:46:00.988144', 'step': 20599, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:01.043637', 'step': 20599, 'epoch': 3} {'type': 'loss', 'content': 0.10130626708269119, 'timestamp': '2025-10-01 04:46:01.055252', 'step': 20600, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:01.108194', 'step': 20600, 'epoch': 3} {'type': 'loss', 'content': 0.11295613646507263, 'timestamp': '2025-10-01 04:46:01.123132', 'step': 20601, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:01.176914', 'step': 20601, 'epoch': 3} {'type': 'loss', 'content': 0.12432688474655151, 'timestamp': '2025-10-01 04:46:01.178969', 'step': 20602, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:46:01.232405', 'step': 20602, 'epoch': 3} {'type': 'loss', 'content': 0.07292252779006958, 'timestamp': '2025-10-01 04:46:01.234522', 'step': 20603, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:01.288156', 'step': 20603, 'epoch': 3} {'type': 'loss', 'content': 0.18185147643089294, 'timestamp': '2025-10-01 04:46:01.297940', 'step': 20604, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:01.352011', 'step': 20604, 'epoch': 3} {'type': 'loss', 'content': 0.06464343518018723, 'timestamp': '2025-10-01 04:46:01.354099', 'step': 20605, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:01.408012', 'step': 20605, 'epoch': 3} {'type': 'loss', 'content': 0.11607163399457932, 'timestamp': '2025-10-01 04:46:01.418352', 'step': 20606, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:01.480688', 'step': 20606, 'epoch': 3} {'type': 'loss', 'content': 0.12082262337207794, 'timestamp': '2025-10-01 04:46:01.482783', 'step': 20607, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:01.536208', 'step': 20607, 'epoch': 3} {'type': 'loss', 'content': 0.09846369177103043, 'timestamp': '2025-10-01 04:46:01.542846', 'step': 20608, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:46:01.598032', 'step': 20608, 'epoch': 3} {'type': 'loss', 'content': 0.09333567321300507, 'timestamp': '2025-10-01 04:46:01.600337', 'step': 20609, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:01.656067', 'step': 20609, 'epoch': 3} {'type': 'loss', 'content': 0.12488549202680588, 'timestamp': '2025-10-01 04:46:01.658558', 'step': 20610, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:46:01.713684', 'step': 20610, 'epoch': 3} {'type': 'loss', 'content': 0.061370570212602615, 'timestamp': '2025-10-01 04:46:01.716222', 'step': 20611, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:46:01.770643', 'step': 20611, 'epoch': 3} {'type': 'loss', 'content': 0.05965498462319374, 'timestamp': '2025-10-01 04:46:01.778465', 'step': 20612, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:46:01.832979', 'step': 20612, 'epoch': 3} {'type': 'loss', 'content': 0.05244022607803345, 'timestamp': '2025-10-01 04:46:01.835539', 'step': 20613, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:01.891359', 'step': 20613, 'epoch': 3} {'type': 'loss', 'content': 0.13753600418567657, 'timestamp': '2025-10-01 04:46:01.894373', 'step': 20614, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:46:01.954546', 'step': 20614, 'epoch': 3} {'type': 'loss', 'content': 0.0954693928360939, 'timestamp': '2025-10-01 04:46:01.957063', 'step': 20615, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:02.010699', 'step': 20615, 'epoch': 3} {'type': 'loss', 'content': 0.13393999636173248, 'timestamp': '2025-10-01 04:46:02.017060', 'step': 20616, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:02.070894', 'step': 20616, 'epoch': 3} {'type': 'loss', 'content': 0.04366675019264221, 'timestamp': '2025-10-01 04:46:02.073375', 'step': 20617, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:02.127183', 'step': 20617, 'epoch': 3} {'type': 'loss', 'content': 0.08455641567707062, 'timestamp': '2025-10-01 04:46:02.129795', 'step': 20618, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:02.183865', 'step': 20618, 'epoch': 3} {'type': 'loss', 'content': 0.055349256843328476, 'timestamp': '2025-10-01 04:46:02.186418', 'step': 20619, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:02.240436', 'step': 20619, 'epoch': 3} {'type': 'loss', 'content': 0.03535398468375206, 'timestamp': '2025-10-01 04:46:02.252932', 'step': 20620, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:02.305823', 'step': 20620, 'epoch': 3} {'type': 'loss', 'content': 0.13711023330688477, 'timestamp': '2025-10-01 04:46:02.308005', 'step': 20621, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:02.369607', 'step': 20621, 'epoch': 3} {'type': 'loss', 'content': 0.07490456849336624, 'timestamp': '2025-10-01 04:46:02.372076', 'step': 20622, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:02.429108', 'step': 20622, 'epoch': 3} {'type': 'loss', 'content': 0.09062802046537399, 'timestamp': '2025-10-01 04:46:02.431550', 'step': 20623, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:02.487362', 'step': 20623, 'epoch': 3} {'type': 'loss', 'content': 0.05050704628229141, 'timestamp': '2025-10-01 04:46:02.494085', 'step': 20624, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:02.550846', 'step': 20624, 'epoch': 3} {'type': 'loss', 'content': 0.09579954296350479, 'timestamp': '2025-10-01 04:46:02.553259', 'step': 20625, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:02.608884', 'step': 20625, 'epoch': 3} {'type': 'loss', 'content': 0.053987689316272736, 'timestamp': '2025-10-01 04:46:02.611226', 'step': 20626, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:46:02.668053', 'step': 20626, 'epoch': 3} {'type': 'loss', 'content': 0.10052751004695892, 'timestamp': '2025-10-01 04:46:02.670768', 'step': 20627, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:46:02.727122', 'step': 20627, 'epoch': 3} {'type': 'loss', 'content': 0.08423362672328949, 'timestamp': '2025-10-01 04:46:02.736706', 'step': 20628, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:02.818449', 'step': 20628, 'epoch': 3} {'type': 'loss', 'content': 0.10791701078414917, 'timestamp': '2025-10-01 04:46:02.825881', 'step': 20629, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:02.882490', 'step': 20629, 'epoch': 3} {'type': 'loss', 'content': 0.07173503935337067, 'timestamp': '2025-10-01 04:46:02.884987', 'step': 20630, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:02.941456', 'step': 20630, 'epoch': 3} {'type': 'loss', 'content': 0.12471086531877518, 'timestamp': '2025-10-01 04:46:02.943701', 'step': 20631, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:02.998684', 'step': 20631, 'epoch': 3} {'type': 'loss', 'content': 0.026340629905462265, 'timestamp': '2025-10-01 04:46:03.005061', 'step': 20632, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:03.058282', 'step': 20632, 'epoch': 3} {'type': 'loss', 'content': 0.047347161918878555, 'timestamp': '2025-10-01 04:46:03.060798', 'step': 20633, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:03.114681', 'step': 20633, 'epoch': 3} {'type': 'loss', 'content': 0.11741384118795395, 'timestamp': '2025-10-01 04:46:03.117364', 'step': 20634, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:46:03.176981', 'step': 20634, 'epoch': 3} {'type': 'loss', 'content': 0.05141054466366768, 'timestamp': '2025-10-01 04:46:03.179275', 'step': 20635, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:46:03.238994', 'step': 20635, 'epoch': 3} {'type': 'loss', 'content': 0.03137319162487984, 'timestamp': '2025-10-01 04:46:03.246275', 'step': 20636, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:03.302595', 'step': 20636, 'epoch': 3} {'type': 'loss', 'content': 0.08476263284683228, 'timestamp': '2025-10-01 04:46:03.304842', 'step': 20637, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:46:03.358835', 'step': 20637, 'epoch': 3} {'type': 'loss', 'content': 0.08921346813440323, 'timestamp': '2025-10-01 04:46:03.360612', 'step': 20638, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:03.413562', 'step': 20638, 'epoch': 3} {'type': 'loss', 'content': 0.07479318976402283, 'timestamp': '2025-10-01 04:46:03.415700', 'step': 20639, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:03.480390', 'step': 20639, 'epoch': 3} {'type': 'loss', 'content': 0.13946618139743805, 'timestamp': '2025-10-01 04:46:03.486345', 'step': 20640, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:46:03.538908', 'step': 20640, 'epoch': 3} {'type': 'loss', 'content': 0.05735547095537186, 'timestamp': '2025-10-01 04:46:03.541004', 'step': 20641, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:03.594425', 'step': 20641, 'epoch': 3} {'type': 'loss', 'content': 0.10027667880058289, 'timestamp': '2025-10-01 04:46:03.596751', 'step': 20642, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:03.650221', 'step': 20642, 'epoch': 3} {'type': 'loss', 'content': 0.09410412609577179, 'timestamp': '2025-10-01 04:46:03.652275', 'step': 20643, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:03.704948', 'step': 20643, 'epoch': 3} {'type': 'loss', 'content': 0.04333994910120964, 'timestamp': '2025-10-01 04:46:03.710775', 'step': 20644, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:03.763281', 'step': 20644, 'epoch': 3} {'type': 'loss', 'content': 0.03595931455492973, 'timestamp': '2025-10-01 04:46:03.765598', 'step': 20645, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:46:03.818761', 'step': 20645, 'epoch': 3} {'type': 'loss', 'content': 0.10796593874692917, 'timestamp': '2025-10-01 04:46:03.821114', 'step': 20646, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:46:03.874275', 'step': 20646, 'epoch': 3} {'type': 'loss', 'content': 0.08058597892522812, 'timestamp': '2025-10-01 04:46:03.876339', 'step': 20647, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:03.929237', 'step': 20647, 'epoch': 3} {'type': 'loss', 'content': 0.04907943680882454, 'timestamp': '2025-10-01 04:46:03.935009', 'step': 20648, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:03.988361', 'step': 20648, 'epoch': 3} {'type': 'loss', 'content': 0.06648735702037811, 'timestamp': '2025-10-01 04:46:03.990498', 'step': 20649, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:04.043180', 'step': 20649, 'epoch': 3} {'type': 'loss', 'content': 0.11322946101427078, 'timestamp': '2025-10-01 04:46:04.045274', 'step': 20650, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:46:04.107383', 'step': 20650, 'epoch': 3} {'type': 'loss', 'content': 0.040183406323194504, 'timestamp': '2025-10-01 04:46:04.109544', 'step': 20651, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:04.163047', 'step': 20651, 'epoch': 3} {'type': 'loss', 'content': 0.08158058673143387, 'timestamp': '2025-10-01 04:46:04.168748', 'step': 20652, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:04.233321', 'step': 20652, 'epoch': 3} {'type': 'loss', 'content': 0.06235981732606888, 'timestamp': '2025-10-01 04:46:04.235445', 'step': 20653, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:04.288852', 'step': 20653, 'epoch': 3} {'type': 'loss', 'content': 0.05692848935723305, 'timestamp': '2025-10-01 04:46:04.290978', 'step': 20654, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:04.344902', 'step': 20654, 'epoch': 3} {'type': 'loss', 'content': 0.05634240433573723, 'timestamp': '2025-10-01 04:46:04.347049', 'step': 20655, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:46:04.401788', 'step': 20655, 'epoch': 3} {'type': 'loss', 'content': 0.11986242979764938, 'timestamp': '2025-10-01 04:46:04.415959', 'step': 20656, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:46:04.481667', 'step': 20656, 'epoch': 3} {'type': 'loss', 'content': 0.06343920528888702, 'timestamp': '2025-10-01 04:46:04.484318', 'step': 20657, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:04.551804', 'step': 20657, 'epoch': 3} {'type': 'loss', 'content': 0.05570026859641075, 'timestamp': '2025-10-01 04:46:04.554098', 'step': 20658, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:04.606882', 'step': 20658, 'epoch': 3} {'type': 'loss', 'content': 0.07679347693920135, 'timestamp': '2025-10-01 04:46:04.609172', 'step': 20659, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:04.672481', 'step': 20659, 'epoch': 3} {'type': 'loss', 'content': 0.095596082508564, 'timestamp': '2025-10-01 04:46:04.678833', 'step': 20660, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:04.732874', 'step': 20660, 'epoch': 3} {'type': 'loss', 'content': 0.038857053965330124, 'timestamp': '2025-10-01 04:46:04.735097', 'step': 20661, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:46:04.787973', 'step': 20661, 'epoch': 3} {'type': 'loss', 'content': 0.06656494736671448, 'timestamp': '2025-10-01 04:46:04.790038', 'step': 20662, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:04.843061', 'step': 20662, 'epoch': 3} {'type': 'loss', 'content': 0.07227840274572372, 'timestamp': '2025-10-01 04:46:04.845130', 'step': 20663, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:04.910302', 'step': 20663, 'epoch': 3} {'type': 'loss', 'content': 0.07813792675733566, 'timestamp': '2025-10-01 04:46:04.915929', 'step': 20664, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:04.968437', 'step': 20664, 'epoch': 3} {'type': 'loss', 'content': 0.08490225672721863, 'timestamp': '2025-10-01 04:46:04.972345', 'step': 20665, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:05.030966', 'step': 20665, 'epoch': 3} {'type': 'loss', 'content': 0.12239988893270493, 'timestamp': '2025-10-01 04:46:05.033088', 'step': 20666, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:05.086145', 'step': 20666, 'epoch': 3} {'type': 'loss', 'content': 0.0581994466483593, 'timestamp': '2025-10-01 04:46:05.092626', 'step': 20667, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:05.146953', 'step': 20667, 'epoch': 3} {'type': 'loss', 'content': 0.08155489712953568, 'timestamp': '2025-10-01 04:46:05.153392', 'step': 20668, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:05.205712', 'step': 20668, 'epoch': 3} {'type': 'loss', 'content': 0.08603664487600327, 'timestamp': '2025-10-01 04:46:05.207901', 'step': 20669, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:05.260916', 'step': 20669, 'epoch': 3} {'type': 'loss', 'content': 0.07435108721256256, 'timestamp': '2025-10-01 04:46:05.263008', 'step': 20670, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:05.315967', 'step': 20670, 'epoch': 3} {'type': 'loss', 'content': 0.09362245351076126, 'timestamp': '2025-10-01 04:46:05.319375', 'step': 20671, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:05.379471', 'step': 20671, 'epoch': 3} {'type': 'loss', 'content': 0.03680713474750519, 'timestamp': '2025-10-01 04:46:05.385230', 'step': 20672, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:46:05.438044', 'step': 20672, 'epoch': 3} {'type': 'loss', 'content': 0.0361286886036396, 'timestamp': '2025-10-01 04:46:05.440213', 'step': 20673, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:05.494468', 'step': 20673, 'epoch': 3} {'type': 'loss', 'content': 0.06535107642412186, 'timestamp': '2025-10-01 04:46:05.499351', 'step': 20674, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:05.564756', 'step': 20674, 'epoch': 3} {'type': 'loss', 'content': 0.10736392438411713, 'timestamp': '2025-10-01 04:46:05.566863', 'step': 20675, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:05.624685', 'step': 20675, 'epoch': 3} {'type': 'loss', 'content': 0.04930581524968147, 'timestamp': '2025-10-01 04:46:05.630275', 'step': 20676, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:05.682661', 'step': 20676, 'epoch': 3} {'type': 'loss', 'content': 0.113116554915905, 'timestamp': '2025-10-01 04:46:05.684947', 'step': 20677, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:46:05.753871', 'step': 20677, 'epoch': 3} {'type': 'loss', 'content': 0.14648142457008362, 'timestamp': '2025-10-01 04:46:05.756022', 'step': 20678, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:05.812464', 'step': 20678, 'epoch': 3} {'type': 'loss', 'content': 0.09496743232011795, 'timestamp': '2025-10-01 04:46:05.815213', 'step': 20679, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:05.868742', 'step': 20679, 'epoch': 3} {'type': 'loss', 'content': 0.06226908788084984, 'timestamp': '2025-10-01 04:46:05.874331', 'step': 20680, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:05.927022', 'step': 20680, 'epoch': 3} {'type': 'loss', 'content': 0.07313401997089386, 'timestamp': '2025-10-01 04:46:05.929141', 'step': 20681, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:05.981880', 'step': 20681, 'epoch': 3} {'type': 'loss', 'content': 0.0608675554394722, 'timestamp': '2025-10-01 04:46:05.983994', 'step': 20682, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:06.036778', 'step': 20682, 'epoch': 3} {'type': 'loss', 'content': 0.027926968410611153, 'timestamp': '2025-10-01 04:46:06.038837', 'step': 20683, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:06.091636', 'step': 20683, 'epoch': 3} {'type': 'loss', 'content': 0.1532214730978012, 'timestamp': '2025-10-01 04:46:06.097256', 'step': 20684, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:06.151009', 'step': 20684, 'epoch': 3} {'type': 'loss', 'content': 0.17343027889728546, 'timestamp': '2025-10-01 04:46:06.157107', 'step': 20685, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:46:06.215683', 'step': 20685, 'epoch': 3} {'type': 'loss', 'content': 0.09507207572460175, 'timestamp': '2025-10-01 04:46:06.218383', 'step': 20686, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:46:06.271420', 'step': 20686, 'epoch': 3} {'type': 'loss', 'content': 0.0833725780248642, 'timestamp': '2025-10-01 04:46:06.273746', 'step': 20687, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:06.327381', 'step': 20687, 'epoch': 3} {'type': 'loss', 'content': 0.15715092420578003, 'timestamp': '2025-10-01 04:46:06.333102', 'step': 20688, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:06.386421', 'step': 20688, 'epoch': 3} {'type': 'loss', 'content': 0.0996663048863411, 'timestamp': '2025-10-01 04:46:06.389094', 'step': 20689, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:46:06.442115', 'step': 20689, 'epoch': 3} {'type': 'loss', 'content': 0.04721904173493385, 'timestamp': '2025-10-01 04:46:06.444207', 'step': 20690, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:06.502803', 'step': 20690, 'epoch': 3} {'type': 'loss', 'content': 0.1455245465040207, 'timestamp': '2025-10-01 04:46:06.504869', 'step': 20691, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:06.559589', 'step': 20691, 'epoch': 3} {'type': 'loss', 'content': 0.19482733309268951, 'timestamp': '2025-10-01 04:46:06.565230', 'step': 20692, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:06.618071', 'step': 20692, 'epoch': 3} {'type': 'loss', 'content': 0.06901834905147552, 'timestamp': '2025-10-01 04:46:06.620175', 'step': 20693, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:06.672870', 'step': 20693, 'epoch': 3} {'type': 'loss', 'content': 0.024291066452860832, 'timestamp': '2025-10-01 04:46:06.674964', 'step': 20694, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:06.728148', 'step': 20694, 'epoch': 3} {'type': 'loss', 'content': 0.08313902467489243, 'timestamp': '2025-10-01 04:46:06.730222', 'step': 20695, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:06.783262', 'step': 20695, 'epoch': 3} {'type': 'loss', 'content': 0.07659151405096054, 'timestamp': '2025-10-01 04:46:06.788939', 'step': 20696, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:06.841487', 'step': 20696, 'epoch': 3} {'type': 'loss', 'content': 0.09544913470745087, 'timestamp': '2025-10-01 04:46:06.843919', 'step': 20697, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:06.898251', 'step': 20697, 'epoch': 3} {'type': 'loss', 'content': 0.0885634496808052, 'timestamp': '2025-10-01 04:46:06.900403', 'step': 20698, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:06.953699', 'step': 20698, 'epoch': 3} {'type': 'loss', 'content': 0.12157304584980011, 'timestamp': '2025-10-01 04:46:06.955663', 'step': 20699, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:46:07.008255', 'step': 20699, 'epoch': 3} {'type': 'loss', 'content': 0.05927406996488571, 'timestamp': '2025-10-01 04:46:07.013958', 'step': 20700, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:07.066090', 'step': 20700, 'epoch': 3} {'type': 'loss', 'content': 0.09415066242218018, 'timestamp': '2025-10-01 04:46:07.068240', 'step': 20701, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:07.121105', 'step': 20701, 'epoch': 3} {'type': 'loss', 'content': 0.029036492109298706, 'timestamp': '2025-10-01 04:46:07.123341', 'step': 20702, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:07.194224', 'step': 20702, 'epoch': 3} {'type': 'loss', 'content': 0.1063496395945549, 'timestamp': '2025-10-01 04:46:07.197015', 'step': 20703, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:07.250967', 'step': 20703, 'epoch': 3} {'type': 'loss', 'content': 0.08058542758226395, 'timestamp': '2025-10-01 04:46:07.256645', 'step': 20704, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:07.309149', 'step': 20704, 'epoch': 3} {'type': 'loss', 'content': 0.14259907603263855, 'timestamp': '2025-10-01 04:46:07.311310', 'step': 20705, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:07.363998', 'step': 20705, 'epoch': 3} {'type': 'loss', 'content': 0.11014688014984131, 'timestamp': '2025-10-01 04:46:07.366079', 'step': 20706, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:07.419680', 'step': 20706, 'epoch': 3} {'type': 'loss', 'content': 0.11949536949396133, 'timestamp': '2025-10-01 04:46:07.421785', 'step': 20707, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:07.474596', 'step': 20707, 'epoch': 3} {'type': 'loss', 'content': 0.05475510284304619, 'timestamp': '2025-10-01 04:46:07.480188', 'step': 20708, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:07.532867', 'step': 20708, 'epoch': 3} {'type': 'loss', 'content': 0.06296664476394653, 'timestamp': '2025-10-01 04:46:07.535352', 'step': 20709, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:07.588308', 'step': 20709, 'epoch': 3} {'type': 'loss', 'content': 0.10856492072343826, 'timestamp': '2025-10-01 04:46:07.590771', 'step': 20710, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:07.645457', 'step': 20710, 'epoch': 3} {'type': 'loss', 'content': 0.13255812227725983, 'timestamp': '2025-10-01 04:46:07.656597', 'step': 20711, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:07.709297', 'step': 20711, 'epoch': 3} {'type': 'loss', 'content': 0.06410279870033264, 'timestamp': '2025-10-01 04:46:07.726631', 'step': 20712, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:07.779495', 'step': 20712, 'epoch': 3} {'type': 'loss', 'content': 0.08042748272418976, 'timestamp': '2025-10-01 04:46:07.791357', 'step': 20713, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:07.848651', 'step': 20713, 'epoch': 3} {'type': 'loss', 'content': 0.08921346068382263, 'timestamp': '2025-10-01 04:46:07.851141', 'step': 20714, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:07.905176', 'step': 20714, 'epoch': 3} {'type': 'loss', 'content': 0.06210712715983391, 'timestamp': '2025-10-01 04:46:07.907662', 'step': 20715, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:07.960497', 'step': 20715, 'epoch': 3} {'type': 'loss', 'content': 0.10305832326412201, 'timestamp': '2025-10-01 04:46:07.966073', 'step': 20716, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:08.019739', 'step': 20716, 'epoch': 3} {'type': 'loss', 'content': 0.0994611606001854, 'timestamp': '2025-10-01 04:46:08.021763', 'step': 20717, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:08.074683', 'step': 20717, 'epoch': 3} {'type': 'loss', 'content': 0.020108666270971298, 'timestamp': '2025-10-01 04:46:08.076879', 'step': 20718, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:08.130072', 'step': 20718, 'epoch': 3} {'type': 'loss', 'content': 0.06123941391706467, 'timestamp': '2025-10-01 04:46:08.132150', 'step': 20719, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:46:08.186798', 'step': 20719, 'epoch': 3} {'type': 'loss', 'content': 0.06264454871416092, 'timestamp': '2025-10-01 04:46:08.192451', 'step': 20720, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:08.244899', 'step': 20720, 'epoch': 3} {'type': 'loss', 'content': 0.16770516335964203, 'timestamp': '2025-10-01 04:46:08.247037', 'step': 20721, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:46:08.300061', 'step': 20721, 'epoch': 3} {'type': 'loss', 'content': 0.14624711871147156, 'timestamp': '2025-10-01 04:46:08.303379', 'step': 20722, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:08.356017', 'step': 20722, 'epoch': 3} {'type': 'loss', 'content': 0.01610567234456539, 'timestamp': '2025-10-01 04:46:08.357922', 'step': 20723, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:08.416952', 'step': 20723, 'epoch': 3} {'type': 'loss', 'content': 0.04837734252214432, 'timestamp': '2025-10-01 04:46:08.422575', 'step': 20724, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:08.474866', 'step': 20724, 'epoch': 3} {'type': 'loss', 'content': 0.03412022814154625, 'timestamp': '2025-10-01 04:46:08.484786', 'step': 20725, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:46:08.537910', 'step': 20725, 'epoch': 3} {'type': 'loss', 'content': 0.1540318876504898, 'timestamp': '2025-10-01 04:46:08.540037', 'step': 20726, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:08.600420', 'step': 20726, 'epoch': 3} {'type': 'loss', 'content': 0.07754232734441757, 'timestamp': '2025-10-01 04:46:08.602659', 'step': 20727, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:08.656852', 'step': 20727, 'epoch': 3} {'type': 'loss', 'content': 0.09498180449008942, 'timestamp': '2025-10-01 04:46:08.662540', 'step': 20728, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:46:08.723506', 'step': 20728, 'epoch': 3} {'type': 'loss', 'content': 0.07053006440401077, 'timestamp': '2025-10-01 04:46:08.725758', 'step': 20729, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:08.779366', 'step': 20729, 'epoch': 3} {'type': 'loss', 'content': 0.04852711409330368, 'timestamp': '2025-10-01 04:46:08.781664', 'step': 20730, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:08.835760', 'step': 20730, 'epoch': 3} {'type': 'loss', 'content': 0.023649683222174644, 'timestamp': '2025-10-01 04:46:08.838012', 'step': 20731, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:46:08.890728', 'step': 20731, 'epoch': 3} {'type': 'loss', 'content': 0.049247272312641144, 'timestamp': '2025-10-01 04:46:08.896310', 'step': 20732, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:08.949673', 'step': 20732, 'epoch': 3} {'type': 'loss', 'content': 0.12133859843015671, 'timestamp': '2025-10-01 04:46:08.951717', 'step': 20733, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:09.004988', 'step': 20733, 'epoch': 3} {'type': 'loss', 'content': 0.10874532908201218, 'timestamp': '2025-10-01 04:46:09.007210', 'step': 20734, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:09.060338', 'step': 20734, 'epoch': 3} {'type': 'loss', 'content': 0.12467796355485916, 'timestamp': '2025-10-01 04:46:09.067110', 'step': 20735, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:09.120915', 'step': 20735, 'epoch': 3} {'type': 'loss', 'content': 0.09241998195648193, 'timestamp': '2025-10-01 04:46:09.126676', 'step': 20736, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:09.180214', 'step': 20736, 'epoch': 3} {'type': 'loss', 'content': 0.06546379625797272, 'timestamp': '2025-10-01 04:46:09.182425', 'step': 20737, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:09.236345', 'step': 20737, 'epoch': 3} {'type': 'loss', 'content': 0.09301836043596268, 'timestamp': '2025-10-01 04:46:09.239383', 'step': 20738, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:09.292732', 'step': 20738, 'epoch': 3} {'type': 'loss', 'content': 0.01607378013432026, 'timestamp': '2025-10-01 04:46:09.295070', 'step': 20739, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:09.348385', 'step': 20739, 'epoch': 3} {'type': 'loss', 'content': 0.14785341918468475, 'timestamp': '2025-10-01 04:46:09.354412', 'step': 20740, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:09.421885', 'step': 20740, 'epoch': 3} {'type': 'loss', 'content': 0.07642523944377899, 'timestamp': '2025-10-01 04:46:09.424112', 'step': 20741, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:09.477207', 'step': 20741, 'epoch': 3} {'type': 'loss', 'content': 0.1387191265821457, 'timestamp': '2025-10-01 04:46:09.486238', 'step': 20742, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:09.539535', 'step': 20742, 'epoch': 3} {'type': 'loss', 'content': 0.07496107369661331, 'timestamp': '2025-10-01 04:46:09.541699', 'step': 20743, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:09.606505', 'step': 20743, 'epoch': 3} {'type': 'loss', 'content': 0.07456033676862717, 'timestamp': '2025-10-01 04:46:09.612404', 'step': 20744, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:09.665289', 'step': 20744, 'epoch': 3} {'type': 'loss', 'content': 0.09547515213489532, 'timestamp': '2025-10-01 04:46:09.667333', 'step': 20745, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:46:09.720259', 'step': 20745, 'epoch': 3} {'type': 'loss', 'content': 0.1589372605085373, 'timestamp': '2025-10-01 04:46:09.722495', 'step': 20746, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:09.785277', 'step': 20746, 'epoch': 3} {'type': 'loss', 'content': 0.06657084077596664, 'timestamp': '2025-10-01 04:46:09.789837', 'step': 20747, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:09.850933', 'step': 20747, 'epoch': 3} {'type': 'loss', 'content': 0.038535669445991516, 'timestamp': '2025-10-01 04:46:09.856671', 'step': 20748, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:09.910757', 'step': 20748, 'epoch': 3} {'type': 'loss', 'content': 0.06009260192513466, 'timestamp': '2025-10-01 04:46:09.912918', 'step': 20749, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:09.965846', 'step': 20749, 'epoch': 3} {'type': 'loss', 'content': 0.09952200204133987, 'timestamp': '2025-10-01 04:46:09.968088', 'step': 20750, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:10.021777', 'step': 20750, 'epoch': 3} {'type': 'loss', 'content': 0.11994879692792892, 'timestamp': '2025-10-01 04:46:10.024569', 'step': 20751, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:10.077677', 'step': 20751, 'epoch': 3} {'type': 'loss', 'content': 0.06435924023389816, 'timestamp': '2025-10-01 04:46:10.083214', 'step': 20752, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-01 04:46:10.135999', 'step': 20752, 'epoch': 3} {'type': 'loss', 'content': 0.12631191313266754, 'timestamp': '2025-10-01 04:46:10.144886', 'step': 20753, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:10.198005', 'step': 20753, 'epoch': 3} {'type': 'loss', 'content': 0.07611692696809769, 'timestamp': '2025-10-01 04:46:10.206485', 'step': 20754, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:10.259589', 'step': 20754, 'epoch': 3} {'type': 'loss', 'content': 0.033563073724508286, 'timestamp': '2025-10-01 04:46:10.267224', 'step': 20755, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:10.320794', 'step': 20755, 'epoch': 3} {'type': 'loss', 'content': 0.10032211989164352, 'timestamp': '2025-10-01 04:46:10.326408', 'step': 20756, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:10.378717', 'step': 20756, 'epoch': 3} {'type': 'loss', 'content': 0.06414641439914703, 'timestamp': '2025-10-01 04:46:10.380949', 'step': 20757, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:10.434336', 'step': 20757, 'epoch': 3} {'type': 'loss', 'content': 0.05647079274058342, 'timestamp': '2025-10-01 04:46:10.436659', 'step': 20758, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:10.489973', 'step': 20758, 'epoch': 3} {'type': 'loss', 'content': 0.050297483801841736, 'timestamp': '2025-10-01 04:46:10.492191', 'step': 20759, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:46:10.545289', 'step': 20759, 'epoch': 3} {'type': 'loss', 'content': 0.09577374905347824, 'timestamp': '2025-10-01 04:46:10.557859', 'step': 20760, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:46:10.610649', 'step': 20760, 'epoch': 3} {'type': 'loss', 'content': 0.0475674644112587, 'timestamp': '2025-10-01 04:46:10.612989', 'step': 20761, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:10.667017', 'step': 20761, 'epoch': 3} {'type': 'loss', 'content': 0.14061297476291656, 'timestamp': '2025-10-01 04:46:10.669234', 'step': 20762, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:10.735080', 'step': 20762, 'epoch': 3} {'type': 'loss', 'content': 0.03919468820095062, 'timestamp': '2025-10-01 04:46:10.737498', 'step': 20763, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:10.792779', 'step': 20763, 'epoch': 3} {'type': 'loss', 'content': 0.08918878436088562, 'timestamp': '2025-10-01 04:46:10.798707', 'step': 20764, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:46:10.851772', 'step': 20764, 'epoch': 3} {'type': 'loss', 'content': 0.13428740203380585, 'timestamp': '2025-10-01 04:46:10.854409', 'step': 20765, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:46:10.909040', 'step': 20765, 'epoch': 3} {'type': 'loss', 'content': 0.08697529137134552, 'timestamp': '2025-10-01 04:46:10.919703', 'step': 20766, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:46:10.983488', 'step': 20766, 'epoch': 3} {'type': 'loss', 'content': 0.11734955757856369, 'timestamp': '2025-10-01 04:46:10.991059', 'step': 20767, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:11.045927', 'step': 20767, 'epoch': 3} {'type': 'loss', 'content': 0.05834653973579407, 'timestamp': '2025-10-01 04:46:11.051999', 'step': 20768, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:11.107737', 'step': 20768, 'epoch': 3} {'type': 'loss', 'content': 0.06856437027454376, 'timestamp': '2025-10-01 04:46:11.118629', 'step': 20769, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:11.174125', 'step': 20769, 'epoch': 3} {'type': 'loss', 'content': 0.16225560009479523, 'timestamp': '2025-10-01 04:46:11.176543', 'step': 20770, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:46:11.230293', 'step': 20770, 'epoch': 3} {'type': 'loss', 'content': 0.08586818724870682, 'timestamp': '2025-10-01 04:46:11.232595', 'step': 20771, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:11.286834', 'step': 20771, 'epoch': 3} {'type': 'loss', 'content': 0.09087486565113068, 'timestamp': '2025-10-01 04:46:11.293221', 'step': 20772, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:11.347573', 'step': 20772, 'epoch': 3} {'type': 'loss', 'content': 0.14459381997585297, 'timestamp': '2025-10-01 04:46:11.349974', 'step': 20773, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:11.404374', 'step': 20773, 'epoch': 3} {'type': 'loss', 'content': 0.056669268757104874, 'timestamp': '2025-10-01 04:46:11.412104', 'step': 20774, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:11.468146', 'step': 20774, 'epoch': 3} {'type': 'loss', 'content': 0.122340627014637, 'timestamp': '2025-10-01 04:46:11.470575', 'step': 20775, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:46:11.525017', 'step': 20775, 'epoch': 3} {'type': 'loss', 'content': 0.031365931034088135, 'timestamp': '2025-10-01 04:46:11.531821', 'step': 20776, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:11.585934', 'step': 20776, 'epoch': 3} {'type': 'loss', 'content': 0.08808421343564987, 'timestamp': '2025-10-01 04:46:11.588234', 'step': 20777, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:11.642456', 'step': 20777, 'epoch': 3} {'type': 'loss', 'content': 0.12126383930444717, 'timestamp': '2025-10-01 04:46:11.645802', 'step': 20778, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:11.700473', 'step': 20778, 'epoch': 3} {'type': 'loss', 'content': 0.1254323422908783, 'timestamp': '2025-10-01 04:46:11.703243', 'step': 20779, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:46:11.768777', 'step': 20779, 'epoch': 3} {'type': 'loss', 'content': 0.18183743953704834, 'timestamp': '2025-10-01 04:46:11.774397', 'step': 20780, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:11.835862', 'step': 20780, 'epoch': 3} {'type': 'loss', 'content': 0.08442197740077972, 'timestamp': '2025-10-01 04:46:11.838576', 'step': 20781, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:11.894463', 'step': 20781, 'epoch': 3} {'type': 'loss', 'content': 0.08697764575481415, 'timestamp': '2025-10-01 04:46:11.897101', 'step': 20782, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:46:11.951591', 'step': 20782, 'epoch': 3} {'type': 'loss', 'content': 0.07488782703876495, 'timestamp': '2025-10-01 04:46:11.953918', 'step': 20783, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:12.007161', 'step': 20783, 'epoch': 3} {'type': 'loss', 'content': 0.048481833189725876, 'timestamp': '2025-10-01 04:46:12.014234', 'step': 20784, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:12.069272', 'step': 20784, 'epoch': 3} {'type': 'loss', 'content': 0.07279707491397858, 'timestamp': '2025-10-01 04:46:12.071599', 'step': 20785, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:12.125737', 'step': 20785, 'epoch': 3} {'type': 'loss', 'content': 0.05047621950507164, 'timestamp': '2025-10-01 04:46:12.128133', 'step': 20786, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:12.182006', 'step': 20786, 'epoch': 3} {'type': 'loss', 'content': 0.06735485047101974, 'timestamp': '2025-10-01 04:46:12.184395', 'step': 20787, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:12.238912', 'step': 20787, 'epoch': 3} {'type': 'loss', 'content': 0.10975417494773865, 'timestamp': '2025-10-01 04:46:12.244549', 'step': 20788, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:12.296980', 'step': 20788, 'epoch': 3} {'type': 'loss', 'content': 0.08162335306406021, 'timestamp': '2025-10-01 04:46:12.299250', 'step': 20789, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:46:12.353210', 'step': 20789, 'epoch': 3} {'type': 'loss', 'content': 0.19865639507770538, 'timestamp': '2025-10-01 04:46:12.355572', 'step': 20790, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:12.409258', 'step': 20790, 'epoch': 3} {'type': 'loss', 'content': 0.05342946574091911, 'timestamp': '2025-10-01 04:46:12.421947', 'step': 20791, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:12.475160', 'step': 20791, 'epoch': 3} {'type': 'loss', 'content': 0.06785371154546738, 'timestamp': '2025-10-01 04:46:12.480743', 'step': 20792, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:12.534114', 'step': 20792, 'epoch': 3} {'type': 'loss', 'content': 0.10070770233869553, 'timestamp': '2025-10-01 04:46:12.536261', 'step': 20793, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:12.589472', 'step': 20793, 'epoch': 3} {'type': 'loss', 'content': 0.015153365209698677, 'timestamp': '2025-10-01 04:46:12.591608', 'step': 20794, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:12.644817', 'step': 20794, 'epoch': 3} {'type': 'loss', 'content': 0.09023715555667877, 'timestamp': '2025-10-01 04:46:12.646956', 'step': 20795, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:12.700325', 'step': 20795, 'epoch': 3} {'type': 'loss', 'content': 0.06660865992307663, 'timestamp': '2025-10-01 04:46:12.707042', 'step': 20796, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:12.759807', 'step': 20796, 'epoch': 3} {'type': 'loss', 'content': 0.06686072796583176, 'timestamp': '2025-10-01 04:46:12.763171', 'step': 20797, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:12.818668', 'step': 20797, 'epoch': 3} {'type': 'loss', 'content': 0.051092684268951416, 'timestamp': '2025-10-01 04:46:12.820813', 'step': 20798, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:12.873987', 'step': 20798, 'epoch': 3} {'type': 'loss', 'content': 0.10823080688714981, 'timestamp': '2025-10-01 04:46:12.875772', 'step': 20799, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:46:12.928981', 'step': 20799, 'epoch': 3} {'type': 'loss', 'content': 0.11931026726961136, 'timestamp': '2025-10-01 04:46:12.934749', 'step': 20800, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:12.987011', 'step': 20800, 'epoch': 3} {'type': 'loss', 'content': 0.17460916936397552, 'timestamp': '2025-10-01 04:46:12.994926', 'step': 20801, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:13.049916', 'step': 20801, 'epoch': 3} {'type': 'loss', 'content': 0.10136813670396805, 'timestamp': '2025-10-01 04:46:13.052047', 'step': 20802, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:46:13.105467', 'step': 20802, 'epoch': 3} {'type': 'loss', 'content': 0.11924591660499573, 'timestamp': '2025-10-01 04:46:13.107591', 'step': 20803, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:13.161688', 'step': 20803, 'epoch': 3} {'type': 'loss', 'content': 0.07088437676429749, 'timestamp': '2025-10-01 04:46:13.167470', 'step': 20804, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:13.219489', 'step': 20804, 'epoch': 3} {'type': 'loss', 'content': 0.043526649475097656, 'timestamp': '2025-10-01 04:46:13.221644', 'step': 20805, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:13.274471', 'step': 20805, 'epoch': 3} {'type': 'loss', 'content': 0.12561801075935364, 'timestamp': '2025-10-01 04:46:13.277782', 'step': 20806, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:13.341833', 'step': 20806, 'epoch': 3} {'type': 'loss', 'content': 0.049627017229795456, 'timestamp': '2025-10-01 04:46:13.344428', 'step': 20807, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:13.401496', 'step': 20807, 'epoch': 3} {'type': 'loss', 'content': 0.13043707609176636, 'timestamp': '2025-10-01 04:46:13.407104', 'step': 20808, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:13.459669', 'step': 20808, 'epoch': 3} {'type': 'loss', 'content': 0.1873313933610916, 'timestamp': '2025-10-01 04:46:13.461764', 'step': 20809, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:13.515647', 'step': 20809, 'epoch': 3} {'type': 'loss', 'content': 0.0705067589879036, 'timestamp': '2025-10-01 04:46:13.517914', 'step': 20810, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:46:13.571321', 'step': 20810, 'epoch': 3} {'type': 'loss', 'content': 0.14043176174163818, 'timestamp': '2025-10-01 04:46:13.573314', 'step': 20811, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:13.626318', 'step': 20811, 'epoch': 3} {'type': 'loss', 'content': 0.061312589794397354, 'timestamp': '2025-10-01 04:46:13.632124', 'step': 20812, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:13.684317', 'step': 20812, 'epoch': 3} {'type': 'loss', 'content': 0.086215540766716, 'timestamp': '2025-10-01 04:46:13.686456', 'step': 20813, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:13.739889', 'step': 20813, 'epoch': 3} {'type': 'loss', 'content': 0.031088707968592644, 'timestamp': '2025-10-01 04:46:13.742216', 'step': 20814, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:13.803969', 'step': 20814, 'epoch': 3} {'type': 'loss', 'content': 0.1093992069363594, 'timestamp': '2025-10-01 04:46:13.806213', 'step': 20815, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:46:13.859524', 'step': 20815, 'epoch': 3} {'type': 'loss', 'content': 0.09841602295637131, 'timestamp': '2025-10-01 04:46:13.865506', 'step': 20816, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:13.917658', 'step': 20816, 'epoch': 3} {'type': 'loss', 'content': 0.11063999682664871, 'timestamp': '2025-10-01 04:46:13.920972', 'step': 20817, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:13.977262', 'step': 20817, 'epoch': 3} {'type': 'loss', 'content': 0.0855151116847992, 'timestamp': '2025-10-01 04:46:13.979440', 'step': 20818, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:46:14.033226', 'step': 20818, 'epoch': 3} {'type': 'loss', 'content': 0.02694689854979515, 'timestamp': '2025-10-01 04:46:14.035598', 'step': 20819, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:14.089433', 'step': 20819, 'epoch': 3} {'type': 'loss', 'content': 0.12660124897956848, 'timestamp': '2025-10-01 04:46:14.095468', 'step': 20820, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:14.147214', 'step': 20820, 'epoch': 3} {'type': 'loss', 'content': 0.08326370269060135, 'timestamp': '2025-10-01 04:46:14.149262', 'step': 20821, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:14.202585', 'step': 20821, 'epoch': 3} {'type': 'loss', 'content': 0.030445806682109833, 'timestamp': '2025-10-01 04:46:14.205191', 'step': 20822, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:14.257854', 'step': 20822, 'epoch': 3} {'type': 'loss', 'content': 0.057549938559532166, 'timestamp': '2025-10-01 04:46:14.259980', 'step': 20823, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:46:14.313575', 'step': 20823, 'epoch': 3} {'type': 'loss', 'content': 0.15353326499462128, 'timestamp': '2025-10-01 04:46:14.319366', 'step': 20824, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:14.371886', 'step': 20824, 'epoch': 3} {'type': 'loss', 'content': 0.10258965194225311, 'timestamp': '2025-10-01 04:46:14.373955', 'step': 20825, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:14.437249', 'step': 20825, 'epoch': 3} {'type': 'loss', 'content': 0.09481978416442871, 'timestamp': '2025-10-01 04:46:14.439320', 'step': 20826, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:14.492003', 'step': 20826, 'epoch': 3} {'type': 'loss', 'content': 0.08176503330469131, 'timestamp': '2025-10-01 04:46:14.495826', 'step': 20827, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:14.551497', 'step': 20827, 'epoch': 3} {'type': 'loss', 'content': 0.13355787098407745, 'timestamp': '2025-10-01 04:46:14.557501', 'step': 20828, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:14.610456', 'step': 20828, 'epoch': 3} {'type': 'loss', 'content': 0.11211900413036346, 'timestamp': '2025-10-01 04:46:14.612676', 'step': 20829, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:14.665824', 'step': 20829, 'epoch': 3} {'type': 'loss', 'content': 0.10766790807247162, 'timestamp': '2025-10-01 04:46:14.668569', 'step': 20830, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:14.721689', 'step': 20830, 'epoch': 3} {'type': 'loss', 'content': 0.0739494115114212, 'timestamp': '2025-10-01 04:46:14.723944', 'step': 20831, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:14.776620', 'step': 20831, 'epoch': 3} {'type': 'loss', 'content': 0.09435117244720459, 'timestamp': '2025-10-01 04:46:14.782350', 'step': 20832, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:14.836404', 'step': 20832, 'epoch': 3} {'type': 'loss', 'content': 0.09483953565359116, 'timestamp': '2025-10-01 04:46:14.847412', 'step': 20833, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:14.900859', 'step': 20833, 'epoch': 3} {'type': 'loss', 'content': 0.20499643683433533, 'timestamp': '2025-10-01 04:46:14.903259', 'step': 20834, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:46:14.957077', 'step': 20834, 'epoch': 3} {'type': 'loss', 'content': 0.10267656296491623, 'timestamp': '2025-10-01 04:46:14.961662', 'step': 20835, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:15.015749', 'step': 20835, 'epoch': 3} {'type': 'loss', 'content': 0.0823703408241272, 'timestamp': '2025-10-01 04:46:15.021658', 'step': 20836, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:15.078993', 'step': 20836, 'epoch': 3} {'type': 'loss', 'content': 0.08570756763219833, 'timestamp': '2025-10-01 04:46:15.088822', 'step': 20837, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:15.142052', 'step': 20837, 'epoch': 3} {'type': 'loss', 'content': 0.03698981925845146, 'timestamp': '2025-10-01 04:46:15.149608', 'step': 20838, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:15.207372', 'step': 20838, 'epoch': 3} {'type': 'loss', 'content': 0.09146306663751602, 'timestamp': '2025-10-01 04:46:15.209405', 'step': 20839, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:15.263018', 'step': 20839, 'epoch': 3} {'type': 'loss', 'content': 0.11441861838102341, 'timestamp': '2025-10-01 04:46:15.268558', 'step': 20840, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:15.320460', 'step': 20840, 'epoch': 3} {'type': 'loss', 'content': 0.08545846492052078, 'timestamp': '2025-10-01 04:46:15.323046', 'step': 20841, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:46:15.375866', 'step': 20841, 'epoch': 3} {'type': 'loss', 'content': 0.12942668795585632, 'timestamp': '2025-10-01 04:46:15.377790', 'step': 20842, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:15.435504', 'step': 20842, 'epoch': 3} {'type': 'loss', 'content': 0.1469649076461792, 'timestamp': '2025-10-01 04:46:15.437782', 'step': 20843, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:15.491105', 'step': 20843, 'epoch': 3} {'type': 'loss', 'content': 0.08216113597154617, 'timestamp': '2025-10-01 04:46:15.496800', 'step': 20844, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:15.549439', 'step': 20844, 'epoch': 3} {'type': 'loss', 'content': 0.024960195645689964, 'timestamp': '2025-10-01 04:46:15.551560', 'step': 20845, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:15.604228', 'step': 20845, 'epoch': 3} {'type': 'loss', 'content': 0.08522843569517136, 'timestamp': '2025-10-01 04:46:15.606230', 'step': 20846, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:15.659124', 'step': 20846, 'epoch': 3} {'type': 'loss', 'content': 0.06032777205109596, 'timestamp': '2025-10-01 04:46:15.661168', 'step': 20847, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:15.713959', 'step': 20847, 'epoch': 3} {'type': 'loss', 'content': 0.04860914498567581, 'timestamp': '2025-10-01 04:46:15.719631', 'step': 20848, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:15.772139', 'step': 20848, 'epoch': 3} {'type': 'loss', 'content': 0.09042944014072418, 'timestamp': '2025-10-01 04:46:15.774244', 'step': 20849, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:15.826974', 'step': 20849, 'epoch': 3} {'type': 'loss', 'content': 0.03227217122912407, 'timestamp': '2025-10-01 04:46:15.829122', 'step': 20850, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:15.882350', 'step': 20850, 'epoch': 3} {'type': 'loss', 'content': 0.0706261470913887, 'timestamp': '2025-10-01 04:46:15.884317', 'step': 20851, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:15.937391', 'step': 20851, 'epoch': 3} {'type': 'loss', 'content': 0.08805646747350693, 'timestamp': '2025-10-01 04:46:15.943089', 'step': 20852, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:15.995731', 'step': 20852, 'epoch': 3} {'type': 'loss', 'content': 0.16254355013370514, 'timestamp': '2025-10-01 04:46:15.997783', 'step': 20853, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:16.051187', 'step': 20853, 'epoch': 3} {'type': 'loss', 'content': 0.08300260454416275, 'timestamp': '2025-10-01 04:46:16.053345', 'step': 20854, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:16.106549', 'step': 20854, 'epoch': 3} {'type': 'loss', 'content': 0.07143217325210571, 'timestamp': '2025-10-01 04:46:16.108670', 'step': 20855, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:16.161842', 'step': 20855, 'epoch': 3} {'type': 'loss', 'content': 0.0635700523853302, 'timestamp': '2025-10-01 04:46:16.167426', 'step': 20856, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:46:16.220043', 'step': 20856, 'epoch': 3} {'type': 'loss', 'content': 0.060355134308338165, 'timestamp': '2025-10-01 04:46:16.222355', 'step': 20857, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:16.275713', 'step': 20857, 'epoch': 3} {'type': 'loss', 'content': 0.08111550658941269, 'timestamp': '2025-10-01 04:46:16.277939', 'step': 20858, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:16.331247', 'step': 20858, 'epoch': 3} {'type': 'loss', 'content': 0.08778475224971771, 'timestamp': '2025-10-01 04:46:16.333739', 'step': 20859, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:16.389009', 'step': 20859, 'epoch': 3} {'type': 'loss', 'content': 0.15443415939807892, 'timestamp': '2025-10-01 04:46:16.394675', 'step': 20860, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:16.446898', 'step': 20860, 'epoch': 3} {'type': 'loss', 'content': 0.11490722745656967, 'timestamp': '2025-10-01 04:46:16.449162', 'step': 20861, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:16.502078', 'step': 20861, 'epoch': 3} {'type': 'loss', 'content': 0.08954377472400665, 'timestamp': '2025-10-01 04:46:16.504770', 'step': 20862, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:46:16.559961', 'step': 20862, 'epoch': 3} {'type': 'loss', 'content': 0.02873498573899269, 'timestamp': '2025-10-01 04:46:16.562038', 'step': 20863, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:16.614934', 'step': 20863, 'epoch': 3} {'type': 'loss', 'content': 0.06601908057928085, 'timestamp': '2025-10-01 04:46:16.620533', 'step': 20864, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:46:16.672591', 'step': 20864, 'epoch': 3} {'type': 'loss', 'content': 0.1813787817955017, 'timestamp': '2025-10-01 04:46:16.674603', 'step': 20865, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:46:16.727346', 'step': 20865, 'epoch': 3} {'type': 'loss', 'content': 0.06146322563290596, 'timestamp': '2025-10-01 04:46:16.729396', 'step': 20866, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:46:16.782504', 'step': 20866, 'epoch': 3} {'type': 'loss', 'content': 0.08568983525037766, 'timestamp': '2025-10-01 04:46:16.784641', 'step': 20867, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:16.837753', 'step': 20867, 'epoch': 3} {'type': 'loss', 'content': 0.1276460438966751, 'timestamp': '2025-10-01 04:46:16.843566', 'step': 20868, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:16.895756', 'step': 20868, 'epoch': 3} {'type': 'loss', 'content': 0.11483633518218994, 'timestamp': '2025-10-01 04:46:16.897956', 'step': 20869, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:16.951578', 'step': 20869, 'epoch': 3} {'type': 'loss', 'content': 0.1519996076822281, 'timestamp': '2025-10-01 04:46:16.953682', 'step': 20870, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:46:17.006724', 'step': 20870, 'epoch': 3} {'type': 'loss', 'content': 0.08475261926651001, 'timestamp': '2025-10-01 04:46:17.010256', 'step': 20871, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:17.063173', 'step': 20871, 'epoch': 3} {'type': 'loss', 'content': 0.12901854515075684, 'timestamp': '2025-10-01 04:46:17.069062', 'step': 20872, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:17.122897', 'step': 20872, 'epoch': 3} {'type': 'loss', 'content': 0.10692469030618668, 'timestamp': '2025-10-01 04:46:17.125076', 'step': 20873, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:17.177908', 'step': 20873, 'epoch': 3} {'type': 'loss', 'content': 0.06619018316268921, 'timestamp': '2025-10-01 04:46:17.180033', 'step': 20874, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:17.233469', 'step': 20874, 'epoch': 3} {'type': 'loss', 'content': 0.10380469262599945, 'timestamp': '2025-10-01 04:46:17.236706', 'step': 20875, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:17.290093', 'step': 20875, 'epoch': 3} {'type': 'loss', 'content': 0.05585683137178421, 'timestamp': '2025-10-01 04:46:17.295768', 'step': 20876, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:17.348986', 'step': 20876, 'epoch': 3} {'type': 'loss', 'content': 0.08677907288074493, 'timestamp': '2025-10-01 04:46:17.351027', 'step': 20877, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:17.403927', 'step': 20877, 'epoch': 3} {'type': 'loss', 'content': 0.08230242133140564, 'timestamp': '2025-10-01 04:46:17.406052', 'step': 20878, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:17.459235', 'step': 20878, 'epoch': 3} {'type': 'loss', 'content': 0.14472568035125732, 'timestamp': '2025-10-01 04:46:17.461268', 'step': 20879, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:17.514424', 'step': 20879, 'epoch': 3} {'type': 'loss', 'content': 0.03630972281098366, 'timestamp': '2025-10-01 04:46:17.520121', 'step': 20880, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:17.572851', 'step': 20880, 'epoch': 3} {'type': 'loss', 'content': 0.08900509774684906, 'timestamp': '2025-10-01 04:46:17.575025', 'step': 20881, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:17.628058', 'step': 20881, 'epoch': 3} {'type': 'loss', 'content': 0.0624532476067543, 'timestamp': '2025-10-01 04:46:17.630116', 'step': 20882, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:17.682836', 'step': 20882, 'epoch': 3} {'type': 'loss', 'content': 0.011748548597097397, 'timestamp': '2025-10-01 04:46:17.685007', 'step': 20883, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:17.738195', 'step': 20883, 'epoch': 3} {'type': 'loss', 'content': 0.018053028732538223, 'timestamp': '2025-10-01 04:46:17.743973', 'step': 20884, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:17.797238', 'step': 20884, 'epoch': 3} {'type': 'loss', 'content': 0.0939321517944336, 'timestamp': '2025-10-01 04:46:17.799353', 'step': 20885, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:17.852736', 'step': 20885, 'epoch': 3} {'type': 'loss', 'content': 0.06757482886314392, 'timestamp': '2025-10-01 04:46:17.855078', 'step': 20886, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:17.908228', 'step': 20886, 'epoch': 3} {'type': 'loss', 'content': 0.06051120162010193, 'timestamp': '2025-10-01 04:46:17.910474', 'step': 20887, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:46:17.964291', 'step': 20887, 'epoch': 3} {'type': 'loss', 'content': 0.0862109586596489, 'timestamp': '2025-10-01 04:46:17.970093', 'step': 20888, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:46:18.022666', 'step': 20888, 'epoch': 3} {'type': 'loss', 'content': 0.060193341225385666, 'timestamp': '2025-10-01 04:46:18.024755', 'step': 20889, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:18.078011', 'step': 20889, 'epoch': 3} {'type': 'loss', 'content': 0.08604435622692108, 'timestamp': '2025-10-01 04:46:18.080244', 'step': 20890, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:46:18.133145', 'step': 20890, 'epoch': 3} {'type': 'loss', 'content': 0.02754954807460308, 'timestamp': '2025-10-01 04:46:18.135378', 'step': 20891, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-01 04:46:18.189088', 'step': 20891, 'epoch': 3} {'type': 'loss', 'content': 0.06197969242930412, 'timestamp': '2025-10-01 04:46:18.194747', 'step': 20892, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:18.249989', 'step': 20892, 'epoch': 3} {'type': 'loss', 'content': 0.1030297800898552, 'timestamp': '2025-10-01 04:46:18.252081', 'step': 20893, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:18.305016', 'step': 20893, 'epoch': 3} {'type': 'loss', 'content': 0.08936887979507446, 'timestamp': '2025-10-01 04:46:18.307443', 'step': 20894, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:18.361551', 'step': 20894, 'epoch': 3} {'type': 'loss', 'content': 0.11163657903671265, 'timestamp': '2025-10-01 04:46:18.363770', 'step': 20895, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:18.419474', 'step': 20895, 'epoch': 3} {'type': 'loss', 'content': 0.10318756103515625, 'timestamp': '2025-10-01 04:46:18.425080', 'step': 20896, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:18.477541', 'step': 20896, 'epoch': 3} {'type': 'loss', 'content': 0.10028161853551865, 'timestamp': '2025-10-01 04:46:18.479434', 'step': 20897, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:18.539234', 'step': 20897, 'epoch': 3} {'type': 'loss', 'content': 0.09652739763259888, 'timestamp': '2025-10-01 04:46:18.541540', 'step': 20898, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:46:18.595908', 'step': 20898, 'epoch': 3} {'type': 'loss', 'content': 0.11846975237131119, 'timestamp': '2025-10-01 04:46:18.597726', 'step': 20899, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-01 04:46:18.654289', 'step': 20899, 'epoch': 3} {'type': 'loss', 'content': 0.08796647936105728, 'timestamp': '2025-10-01 04:46:18.659961', 'step': 20900, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:18.714956', 'step': 20900, 'epoch': 3} {'type': 'loss', 'content': 0.10937871038913727, 'timestamp': '2025-10-01 04:46:18.716949', 'step': 20901, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:18.770577', 'step': 20901, 'epoch': 3} {'type': 'loss', 'content': 0.0968240275979042, 'timestamp': '2025-10-01 04:46:18.772535', 'step': 20902, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:18.827720', 'step': 20902, 'epoch': 3} {'type': 'loss', 'content': 0.11324746906757355, 'timestamp': '2025-10-01 04:46:18.829555', 'step': 20903, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:46:18.884556', 'step': 20903, 'epoch': 3} {'type': 'loss', 'content': 0.11767173558473587, 'timestamp': '2025-10-01 04:46:18.890318', 'step': 20904, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:18.942699', 'step': 20904, 'epoch': 3} {'type': 'loss', 'content': 0.08367360383272171, 'timestamp': '2025-10-01 04:46:18.944951', 'step': 20905, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:18.997629', 'step': 20905, 'epoch': 3} {'type': 'loss', 'content': 0.06671788543462753, 'timestamp': '2025-10-01 04:46:18.999771', 'step': 20906, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:19.053023', 'step': 20906, 'epoch': 3} {'type': 'loss', 'content': 0.04248162731528282, 'timestamp': '2025-10-01 04:46:19.055152', 'step': 20907, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:46:19.108143', 'step': 20907, 'epoch': 3} {'type': 'loss', 'content': 0.08756840229034424, 'timestamp': '2025-10-01 04:46:19.113794', 'step': 20908, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:19.166796', 'step': 20908, 'epoch': 3} {'type': 'loss', 'content': 0.04249758645892143, 'timestamp': '2025-10-01 04:46:19.169011', 'step': 20909, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:19.224415', 'step': 20909, 'epoch': 3} {'type': 'loss', 'content': 0.11909211426973343, 'timestamp': '2025-10-01 04:46:19.227030', 'step': 20910, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:19.280418', 'step': 20910, 'epoch': 3} {'type': 'loss', 'content': 0.08453170210123062, 'timestamp': '2025-10-01 04:46:19.282574', 'step': 20911, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:19.335372', 'step': 20911, 'epoch': 3} {'type': 'loss', 'content': 0.031767118722200394, 'timestamp': '2025-10-01 04:46:19.340998', 'step': 20912, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:19.394093', 'step': 20912, 'epoch': 3} {'type': 'loss', 'content': 0.07272160798311234, 'timestamp': '2025-10-01 04:46:19.396401', 'step': 20913, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:19.449416', 'step': 20913, 'epoch': 3} {'type': 'loss', 'content': 0.09508199244737625, 'timestamp': '2025-10-01 04:46:19.451669', 'step': 20914, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:19.504763', 'step': 20914, 'epoch': 3} {'type': 'loss', 'content': 0.18086399137973785, 'timestamp': '2025-10-01 04:46:19.507082', 'step': 20915, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:19.560001', 'step': 20915, 'epoch': 3} {'type': 'loss', 'content': 0.0855056494474411, 'timestamp': '2025-10-01 04:46:19.565870', 'step': 20916, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:19.618416', 'step': 20916, 'epoch': 3} {'type': 'loss', 'content': 0.10022483021020889, 'timestamp': '2025-10-01 04:46:19.620454', 'step': 20917, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:19.673224', 'step': 20917, 'epoch': 3} {'type': 'loss', 'content': 0.13926070928573608, 'timestamp': '2025-10-01 04:46:19.675430', 'step': 20918, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:19.728629', 'step': 20918, 'epoch': 3} {'type': 'loss', 'content': 0.14524193108081818, 'timestamp': '2025-10-01 04:46:19.730777', 'step': 20919, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:19.785535', 'step': 20919, 'epoch': 3} {'type': 'loss', 'content': 0.1674826592206955, 'timestamp': '2025-10-01 04:46:19.791092', 'step': 20920, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:19.844307', 'step': 20920, 'epoch': 3} {'type': 'loss', 'content': 0.10738220065832138, 'timestamp': '2025-10-01 04:46:19.846476', 'step': 20921, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:19.899275', 'step': 20921, 'epoch': 3} {'type': 'loss', 'content': 0.06274447590112686, 'timestamp': '2025-10-01 04:46:19.901330', 'step': 20922, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:19.954629', 'step': 20922, 'epoch': 3} {'type': 'loss', 'content': 0.09295928478240967, 'timestamp': '2025-10-01 04:46:19.956792', 'step': 20923, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:20.009644', 'step': 20923, 'epoch': 3} {'type': 'loss', 'content': 0.09278260916471481, 'timestamp': '2025-10-01 04:46:20.015394', 'step': 20924, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:20.068771', 'step': 20924, 'epoch': 3} {'type': 'loss', 'content': 0.09828122705221176, 'timestamp': '2025-10-01 04:46:20.070915', 'step': 20925, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:20.125597', 'step': 20925, 'epoch': 3} {'type': 'loss', 'content': 0.036639366298913956, 'timestamp': '2025-10-01 04:46:20.127776', 'step': 20926, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:20.196733', 'step': 20926, 'epoch': 3} {'type': 'loss', 'content': 0.12690213322639465, 'timestamp': '2025-10-01 04:46:20.198823', 'step': 20927, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:46:20.263285', 'step': 20927, 'epoch': 3} {'type': 'loss', 'content': 0.04306456446647644, 'timestamp': '2025-10-01 04:46:20.269105', 'step': 20928, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:20.322019', 'step': 20928, 'epoch': 3} {'type': 'loss', 'content': 0.08491180092096329, 'timestamp': '2025-10-01 04:46:20.324233', 'step': 20929, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:20.378287', 'step': 20929, 'epoch': 3} {'type': 'loss', 'content': 0.11325781047344208, 'timestamp': '2025-10-01 04:46:20.381113', 'step': 20930, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:20.434868', 'step': 20930, 'epoch': 3} {'type': 'loss', 'content': 0.11098898947238922, 'timestamp': '2025-10-01 04:46:20.437110', 'step': 20931, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:20.491003', 'step': 20931, 'epoch': 3} {'type': 'loss', 'content': 0.022179637104272842, 'timestamp': '2025-10-01 04:46:20.496907', 'step': 20932, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:46:20.549775', 'step': 20932, 'epoch': 3} {'type': 'loss', 'content': 0.12665925920009613, 'timestamp': '2025-10-01 04:46:20.551907', 'step': 20933, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:20.606891', 'step': 20933, 'epoch': 3} {'type': 'loss', 'content': 0.10251668840646744, 'timestamp': '2025-10-01 04:46:20.609015', 'step': 20934, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:46:20.665504', 'step': 20934, 'epoch': 3} {'type': 'loss', 'content': 0.04902876168489456, 'timestamp': '2025-10-01 04:46:20.667919', 'step': 20935, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:20.721961', 'step': 20935, 'epoch': 3} {'type': 'loss', 'content': 0.055459752678871155, 'timestamp': '2025-10-01 04:46:20.733447', 'step': 20936, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:20.786999', 'step': 20936, 'epoch': 3} {'type': 'loss', 'content': 0.0690421536564827, 'timestamp': '2025-10-01 04:46:20.789295', 'step': 20937, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:20.844235', 'step': 20937, 'epoch': 3} {'type': 'loss', 'content': 0.06148337200284004, 'timestamp': '2025-10-01 04:46:20.847678', 'step': 20938, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:20.903902', 'step': 20938, 'epoch': 3} {'type': 'loss', 'content': 0.06323733925819397, 'timestamp': '2025-10-01 04:46:20.906008', 'step': 20939, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:20.960501', 'step': 20939, 'epoch': 3} {'type': 'loss', 'content': 0.13925567269325256, 'timestamp': '2025-10-01 04:46:20.966581', 'step': 20940, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:21.019880', 'step': 20940, 'epoch': 3} {'type': 'loss', 'content': 0.06777960062026978, 'timestamp': '2025-10-01 04:46:21.022028', 'step': 20941, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:21.076141', 'step': 20941, 'epoch': 3} {'type': 'loss', 'content': 0.08233751356601715, 'timestamp': '2025-10-01 04:46:21.078323', 'step': 20942, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:21.132067', 'step': 20942, 'epoch': 3} {'type': 'loss', 'content': 0.04016717150807381, 'timestamp': '2025-10-01 04:46:21.134392', 'step': 20943, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:21.198189', 'step': 20943, 'epoch': 3} {'type': 'loss', 'content': 0.11407437920570374, 'timestamp': '2025-10-01 04:46:21.204437', 'step': 20944, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:21.257812', 'step': 20944, 'epoch': 3} {'type': 'loss', 'content': 0.06264371424913406, 'timestamp': '2025-10-01 04:46:21.260395', 'step': 20945, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:21.313672', 'step': 20945, 'epoch': 3} {'type': 'loss', 'content': 0.07901322096586227, 'timestamp': '2025-10-01 04:46:21.316307', 'step': 20946, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:21.369702', 'step': 20946, 'epoch': 3} {'type': 'loss', 'content': 0.024296248331665993, 'timestamp': '2025-10-01 04:46:21.371857', 'step': 20947, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:21.424843', 'step': 20947, 'epoch': 3} {'type': 'loss', 'content': 0.07057006657123566, 'timestamp': '2025-10-01 04:46:21.430802', 'step': 20948, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:21.484163', 'step': 20948, 'epoch': 3} {'type': 'loss', 'content': 0.12450043112039566, 'timestamp': '2025-10-01 04:46:21.486154', 'step': 20949, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:21.539568', 'step': 20949, 'epoch': 3} {'type': 'loss', 'content': 0.04795924946665764, 'timestamp': '2025-10-01 04:46:21.541721', 'step': 20950, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:21.595838', 'step': 20950, 'epoch': 3} {'type': 'loss', 'content': 0.08446341753005981, 'timestamp': '2025-10-01 04:46:21.597806', 'step': 20951, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:21.660873', 'step': 20951, 'epoch': 3} {'type': 'loss', 'content': 0.08089981973171234, 'timestamp': '2025-10-01 04:46:21.666951', 'step': 20952, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:21.719827', 'step': 20952, 'epoch': 3} {'type': 'loss', 'content': 0.0822199359536171, 'timestamp': '2025-10-01 04:46:21.721975', 'step': 20953, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:21.775271', 'step': 20953, 'epoch': 3} {'type': 'loss', 'content': 0.09437762945890427, 'timestamp': '2025-10-01 04:46:21.779157', 'step': 20954, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:46:21.836127', 'step': 20954, 'epoch': 3} {'type': 'loss', 'content': 0.1437249481678009, 'timestamp': '2025-10-01 04:46:21.838270', 'step': 20955, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:21.891824', 'step': 20955, 'epoch': 3} {'type': 'loss', 'content': 0.15357676148414612, 'timestamp': '2025-10-01 04:46:21.897687', 'step': 20956, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:21.951417', 'step': 20956, 'epoch': 3} {'type': 'loss', 'content': 0.051040779799222946, 'timestamp': '2025-10-01 04:46:21.953614', 'step': 20957, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:22.007472', 'step': 20957, 'epoch': 3} {'type': 'loss', 'content': 0.09385641664266586, 'timestamp': '2025-10-01 04:46:22.009874', 'step': 20958, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:22.064025', 'step': 20958, 'epoch': 3} {'type': 'loss', 'content': 0.1160668209195137, 'timestamp': '2025-10-01 04:46:22.066297', 'step': 20959, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:22.134288', 'step': 20959, 'epoch': 3} {'type': 'loss', 'content': 0.09586813300848007, 'timestamp': '2025-10-01 04:46:22.140295', 'step': 20960, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:22.192799', 'step': 20960, 'epoch': 3} {'type': 'loss', 'content': 0.1388646811246872, 'timestamp': '2025-10-01 04:46:22.195228', 'step': 20961, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:22.248997', 'step': 20961, 'epoch': 3} {'type': 'loss', 'content': 0.050580233335494995, 'timestamp': '2025-10-01 04:46:22.251113', 'step': 20962, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:22.304725', 'step': 20962, 'epoch': 3} {'type': 'loss', 'content': 0.0679725632071495, 'timestamp': '2025-10-01 04:46:22.306873', 'step': 20963, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:22.360374', 'step': 20963, 'epoch': 3} {'type': 'loss', 'content': 0.06095803529024124, 'timestamp': '2025-10-01 04:46:22.366167', 'step': 20964, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:22.418934', 'step': 20964, 'epoch': 3} {'type': 'loss', 'content': 0.09003563970327377, 'timestamp': '2025-10-01 04:46:22.421152', 'step': 20965, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:22.474561', 'step': 20965, 'epoch': 3} {'type': 'loss', 'content': 0.0859869047999382, 'timestamp': '2025-10-01 04:46:22.476728', 'step': 20966, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:22.530846', 'step': 20966, 'epoch': 3} {'type': 'loss', 'content': 0.07556527107954025, 'timestamp': '2025-10-01 04:46:22.533108', 'step': 20967, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:22.595790', 'step': 20967, 'epoch': 3} {'type': 'loss', 'content': 0.06505955010652542, 'timestamp': '2025-10-01 04:46:22.601651', 'step': 20968, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:22.654415', 'step': 20968, 'epoch': 3} {'type': 'loss', 'content': 0.1408146470785141, 'timestamp': '2025-10-01 04:46:22.656496', 'step': 20969, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:22.709406', 'step': 20969, 'epoch': 3} {'type': 'loss', 'content': 0.07380006462335587, 'timestamp': '2025-10-01 04:46:22.711665', 'step': 20970, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-01 04:46:35.806567', 'step': 20970, 'epoch': 3} {'type': 'pplx', 'content': 9686.062148929343, 'timestamp': '2025-10-01 04:46:35.814750', 'step': 20970, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:46:35.869882', 'step': 20970, 'epoch': 3} {'type': 'loss', 'content': 0.1436759978532791, 'timestamp': '2025-10-01 04:46:35.872255', 'step': 20971, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:35.926503', 'step': 20971, 'epoch': 3} {'type': 'loss', 'content': 0.05732336640357971, 'timestamp': '2025-10-01 04:46:35.932634', 'step': 20972, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:46:35.985933', 'step': 20972, 'epoch': 3} {'type': 'loss', 'content': 0.034633178263902664, 'timestamp': '2025-10-01 04:46:35.995631', 'step': 20973, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:36.049437', 'step': 20973, 'epoch': 3} {'type': 'loss', 'content': 0.03132375329732895, 'timestamp': '2025-10-01 04:46:36.051770', 'step': 20974, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:36.106750', 'step': 20974, 'epoch': 3} {'type': 'loss', 'content': 0.12414641678333282, 'timestamp': '2025-10-01 04:46:36.109372', 'step': 20975, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:36.164340', 'step': 20975, 'epoch': 3} {'type': 'loss', 'content': 0.0715707466006279, 'timestamp': '2025-10-01 04:46:36.170384', 'step': 20976, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:36.226087', 'step': 20976, 'epoch': 3} {'type': 'loss', 'content': 0.06300263106822968, 'timestamp': '2025-10-01 04:46:36.228551', 'step': 20977, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:36.294580', 'step': 20977, 'epoch': 3} {'type': 'loss', 'content': 0.08828432112932205, 'timestamp': '2025-10-01 04:46:36.297406', 'step': 20978, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:36.351868', 'step': 20978, 'epoch': 3} {'type': 'loss', 'content': 0.2062968909740448, 'timestamp': '2025-10-01 04:46:36.354774', 'step': 20979, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:36.409282', 'step': 20979, 'epoch': 3} {'type': 'loss', 'content': 0.08858907967805862, 'timestamp': '2025-10-01 04:46:36.415424', 'step': 20980, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:36.472551', 'step': 20980, 'epoch': 3} {'type': 'loss', 'content': 0.07817398011684418, 'timestamp': '2025-10-01 04:46:36.475182', 'step': 20981, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:46:36.528655', 'step': 20981, 'epoch': 3} {'type': 'loss', 'content': 0.12516634166240692, 'timestamp': '2025-10-01 04:46:36.531262', 'step': 20982, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:36.584939', 'step': 20982, 'epoch': 3} {'type': 'loss', 'content': 0.04474048316478729, 'timestamp': '2025-10-01 04:46:36.587007', 'step': 20983, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:36.639976', 'step': 20983, 'epoch': 3} {'type': 'loss', 'content': 0.045705221593379974, 'timestamp': '2025-10-01 04:46:36.645620', 'step': 20984, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:36.697845', 'step': 20984, 'epoch': 3} {'type': 'loss', 'content': 0.10118137300014496, 'timestamp': '2025-10-01 04:46:36.699923', 'step': 20985, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:36.752825', 'step': 20985, 'epoch': 3} {'type': 'loss', 'content': 0.02931695431470871, 'timestamp': '2025-10-01 04:46:36.754999', 'step': 20986, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:36.807894', 'step': 20986, 'epoch': 3} {'type': 'loss', 'content': 0.1013999804854393, 'timestamp': '2025-10-01 04:46:36.809994', 'step': 20987, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:36.862755', 'step': 20987, 'epoch': 3} {'type': 'loss', 'content': 0.10377722978591919, 'timestamp': '2025-10-01 04:46:36.868449', 'step': 20988, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:46:36.920866', 'step': 20988, 'epoch': 3} {'type': 'loss', 'content': 0.037792690098285675, 'timestamp': '2025-10-01 04:46:36.922942', 'step': 20989, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:36.975654', 'step': 20989, 'epoch': 3} {'type': 'loss', 'content': 0.05892518535256386, 'timestamp': '2025-10-01 04:46:36.977716', 'step': 20990, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:37.030740', 'step': 20990, 'epoch': 3} {'type': 'loss', 'content': 0.1483992636203766, 'timestamp': '2025-10-01 04:46:37.032921', 'step': 20991, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:37.086191', 'step': 20991, 'epoch': 3} {'type': 'loss', 'content': 0.05540568754076958, 'timestamp': '2025-10-01 04:46:37.091744', 'step': 20992, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:37.144188', 'step': 20992, 'epoch': 3} {'type': 'loss', 'content': 0.05815998837351799, 'timestamp': '2025-10-01 04:46:37.146393', 'step': 20993, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:37.215845', 'step': 20993, 'epoch': 3} {'type': 'loss', 'content': 0.0612921379506588, 'timestamp': '2025-10-01 04:46:37.217964', 'step': 20994, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:37.270829', 'step': 20994, 'epoch': 3} {'type': 'loss', 'content': 0.0792279914021492, 'timestamp': '2025-10-01 04:46:37.272967', 'step': 20995, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:46:37.326295', 'step': 20995, 'epoch': 3} {'type': 'loss', 'content': 0.09036630392074585, 'timestamp': '2025-10-01 04:46:37.331805', 'step': 20996, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:37.384628', 'step': 20996, 'epoch': 3} {'type': 'loss', 'content': 0.029141532257199287, 'timestamp': '2025-10-01 04:46:37.386728', 'step': 20997, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:37.446809', 'step': 20997, 'epoch': 3} {'type': 'loss', 'content': 0.07845398783683777, 'timestamp': '2025-10-01 04:46:37.448878', 'step': 20998, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:37.502090', 'step': 20998, 'epoch': 3} {'type': 'loss', 'content': 0.10488258302211761, 'timestamp': '2025-10-01 04:46:37.504356', 'step': 20999, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:37.565344', 'step': 20999, 'epoch': 3} {'type': 'loss', 'content': 0.09119841456413269, 'timestamp': '2025-10-01 04:46:37.571364', 'step': 21000, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 21000', 'timestamp': '2025-10-01 04:46:37.942766', 'step': 21000, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:46:37.997998', 'step': 21000, 'epoch': 3} {'type': 'loss', 'content': 0.14918479323387146, 'timestamp': '2025-10-01 04:46:38.000049', 'step': 21001, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:38.053620', 'step': 21001, 'epoch': 3} {'type': 'loss', 'content': 0.04814523831009865, 'timestamp': '2025-10-01 04:46:38.055882', 'step': 21002, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:38.110259', 'step': 21002, 'epoch': 3} {'type': 'loss', 'content': 0.1060003787279129, 'timestamp': '2025-10-01 04:46:38.112187', 'step': 21003, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:38.166295', 'step': 21003, 'epoch': 3} {'type': 'loss', 'content': 0.06250540912151337, 'timestamp': '2025-10-01 04:46:38.172305', 'step': 21004, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:38.224497', 'step': 21004, 'epoch': 3} {'type': 'loss', 'content': 0.10244670510292053, 'timestamp': '2025-10-01 04:46:38.226526', 'step': 21005, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:38.279942', 'step': 21005, 'epoch': 3} {'type': 'loss', 'content': 0.11154453456401825, 'timestamp': '2025-10-01 04:46:38.282103', 'step': 21006, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:38.334805', 'step': 21006, 'epoch': 3} {'type': 'loss', 'content': 0.14023761451244354, 'timestamp': '2025-10-01 04:46:38.337245', 'step': 21007, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:38.390397', 'step': 21007, 'epoch': 3} {'type': 'loss', 'content': 0.13222166895866394, 'timestamp': '2025-10-01 04:46:38.396335', 'step': 21008, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:38.449670', 'step': 21008, 'epoch': 3} {'type': 'loss', 'content': 0.03616461157798767, 'timestamp': '2025-10-01 04:46:38.452444', 'step': 21009, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:38.505404', 'step': 21009, 'epoch': 3} {'type': 'loss', 'content': 0.058919183909893036, 'timestamp': '2025-10-01 04:46:38.507789', 'step': 21010, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:38.565279', 'step': 21010, 'epoch': 3} {'type': 'loss', 'content': 0.1079152300953865, 'timestamp': '2025-10-01 04:46:38.567472', 'step': 21011, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:38.621044', 'step': 21011, 'epoch': 3} {'type': 'loss', 'content': 0.09951578080654144, 'timestamp': '2025-10-01 04:46:38.627017', 'step': 21012, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:38.680106', 'step': 21012, 'epoch': 3} {'type': 'loss', 'content': 0.1096535474061966, 'timestamp': '2025-10-01 04:46:38.682343', 'step': 21013, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:38.735337', 'step': 21013, 'epoch': 3} {'type': 'loss', 'content': 0.13676542043685913, 'timestamp': '2025-10-01 04:46:38.737402', 'step': 21014, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:38.790866', 'step': 21014, 'epoch': 3} {'type': 'loss', 'content': 0.11349420249462128, 'timestamp': '2025-10-01 04:46:38.793078', 'step': 21015, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:38.846115', 'step': 21015, 'epoch': 3} {'type': 'loss', 'content': 0.1263546347618103, 'timestamp': '2025-10-01 04:46:38.852450', 'step': 21016, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:38.913478', 'step': 21016, 'epoch': 3} {'type': 'loss', 'content': 0.09732913225889206, 'timestamp': '2025-10-01 04:46:38.923798', 'step': 21017, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:38.978295', 'step': 21017, 'epoch': 3} {'type': 'loss', 'content': 0.08965927362442017, 'timestamp': '2025-10-01 04:46:38.980458', 'step': 21018, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:39.038254', 'step': 21018, 'epoch': 3} {'type': 'loss', 'content': 0.07960999011993408, 'timestamp': '2025-10-01 04:46:39.040981', 'step': 21019, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:46:39.095351', 'step': 21019, 'epoch': 3} {'type': 'loss', 'content': 0.060600265860557556, 'timestamp': '2025-10-01 04:46:39.101053', 'step': 21020, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:39.154197', 'step': 21020, 'epoch': 3} {'type': 'loss', 'content': 0.08537249267101288, 'timestamp': '2025-10-01 04:46:39.157229', 'step': 21021, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:39.211121', 'step': 21021, 'epoch': 3} {'type': 'loss', 'content': 0.050168395042419434, 'timestamp': '2025-10-01 04:46:39.213214', 'step': 21022, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:39.270430', 'step': 21022, 'epoch': 3} {'type': 'loss', 'content': 0.08948705345392227, 'timestamp': '2025-10-01 04:46:39.272564', 'step': 21023, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:46:39.327832', 'step': 21023, 'epoch': 3} {'type': 'loss', 'content': 0.04875969514250755, 'timestamp': '2025-10-01 04:46:39.333749', 'step': 21024, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:39.386369', 'step': 21024, 'epoch': 3} {'type': 'loss', 'content': 0.09923474490642548, 'timestamp': '2025-10-01 04:46:39.398029', 'step': 21025, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:39.451522', 'step': 21025, 'epoch': 3} {'type': 'loss', 'content': 0.0969301164150238, 'timestamp': '2025-10-01 04:46:39.459200', 'step': 21026, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:39.512469', 'step': 21026, 'epoch': 3} {'type': 'loss', 'content': 0.12690886855125427, 'timestamp': '2025-10-01 04:46:39.514567', 'step': 21027, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:39.567455', 'step': 21027, 'epoch': 3} {'type': 'loss', 'content': 0.06333202123641968, 'timestamp': '2025-10-01 04:46:39.573126', 'step': 21028, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:39.628344', 'step': 21028, 'epoch': 3} {'type': 'loss', 'content': 0.12977632880210876, 'timestamp': '2025-10-01 04:46:39.630672', 'step': 21029, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:39.686595', 'step': 21029, 'epoch': 3} {'type': 'loss', 'content': 0.06146254763007164, 'timestamp': '2025-10-01 04:46:39.688913', 'step': 21030, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:39.742639', 'step': 21030, 'epoch': 3} {'type': 'loss', 'content': 0.09044571965932846, 'timestamp': '2025-10-01 04:46:39.745156', 'step': 21031, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:39.812987', 'step': 21031, 'epoch': 3} {'type': 'loss', 'content': 0.10040023177862167, 'timestamp': '2025-10-01 04:46:39.818729', 'step': 21032, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:39.881905', 'step': 21032, 'epoch': 3} {'type': 'loss', 'content': 0.07653188705444336, 'timestamp': '2025-10-01 04:46:39.891371', 'step': 21033, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:39.944307', 'step': 21033, 'epoch': 3} {'type': 'loss', 'content': 0.12192384153604507, 'timestamp': '2025-10-01 04:46:39.946379', 'step': 21034, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:39.999596', 'step': 21034, 'epoch': 3} {'type': 'loss', 'content': 0.06531503051519394, 'timestamp': '2025-10-01 04:46:40.001732', 'step': 21035, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:40.055040', 'step': 21035, 'epoch': 3} {'type': 'loss', 'content': 0.08997808396816254, 'timestamp': '2025-10-01 04:46:40.062199', 'step': 21036, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:40.129478', 'step': 21036, 'epoch': 3} {'type': 'loss', 'content': 0.11352533102035522, 'timestamp': '2025-10-01 04:46:40.131783', 'step': 21037, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:46:40.186258', 'step': 21037, 'epoch': 3} {'type': 'loss', 'content': 0.056839946657419205, 'timestamp': '2025-10-01 04:46:40.190960', 'step': 21038, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:46:40.253663', 'step': 21038, 'epoch': 3} {'type': 'loss', 'content': 0.05947936326265335, 'timestamp': '2025-10-01 04:46:40.256026', 'step': 21039, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:40.309234', 'step': 21039, 'epoch': 3} {'type': 'loss', 'content': 0.16461089253425598, 'timestamp': '2025-10-01 04:46:40.317452', 'step': 21040, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:40.379551', 'step': 21040, 'epoch': 3} {'type': 'loss', 'content': 0.09372062236070633, 'timestamp': '2025-10-01 04:46:40.381745', 'step': 21041, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:40.435735', 'step': 21041, 'epoch': 3} {'type': 'loss', 'content': 0.11625593155622482, 'timestamp': '2025-10-01 04:46:40.437831', 'step': 21042, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:40.491266', 'step': 21042, 'epoch': 3} {'type': 'loss', 'content': 0.11600350588560104, 'timestamp': '2025-10-01 04:46:40.493352', 'step': 21043, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:40.546190', 'step': 21043, 'epoch': 3} {'type': 'loss', 'content': 0.0682576447725296, 'timestamp': '2025-10-01 04:46:40.551950', 'step': 21044, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:40.604880', 'step': 21044, 'epoch': 3} {'type': 'loss', 'content': 0.10185396671295166, 'timestamp': '2025-10-01 04:46:40.607069', 'step': 21045, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:40.660015', 'step': 21045, 'epoch': 3} {'type': 'loss', 'content': 0.11442018300294876, 'timestamp': '2025-10-01 04:46:40.662418', 'step': 21046, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:46:40.716779', 'step': 21046, 'epoch': 3} {'type': 'loss', 'content': 0.04324682056903839, 'timestamp': '2025-10-01 04:46:40.718903', 'step': 21047, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:46:40.776279', 'step': 21047, 'epoch': 3} {'type': 'loss', 'content': 0.10251782089471817, 'timestamp': '2025-10-01 04:46:40.782424', 'step': 21048, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:40.842747', 'step': 21048, 'epoch': 3} {'type': 'loss', 'content': 0.08081305772066116, 'timestamp': '2025-10-01 04:46:40.845214', 'step': 21049, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:40.898245', 'step': 21049, 'epoch': 3} {'type': 'loss', 'content': 0.0760013610124588, 'timestamp': '2025-10-01 04:46:40.900387', 'step': 21050, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:40.954083', 'step': 21050, 'epoch': 3} {'type': 'loss', 'content': 0.08346186578273773, 'timestamp': '2025-10-01 04:46:40.956214', 'step': 21051, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:41.017228', 'step': 21051, 'epoch': 3} {'type': 'loss', 'content': 0.09050342440605164, 'timestamp': '2025-10-01 04:46:41.023418', 'step': 21052, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:41.076497', 'step': 21052, 'epoch': 3} {'type': 'loss', 'content': 0.09882508218288422, 'timestamp': '2025-10-01 04:46:41.078966', 'step': 21053, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:46:41.135101', 'step': 21053, 'epoch': 3} {'type': 'loss', 'content': 0.0427762009203434, 'timestamp': '2025-10-01 04:46:41.137355', 'step': 21054, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:41.190738', 'step': 21054, 'epoch': 3} {'type': 'loss', 'content': 0.13168002665042877, 'timestamp': '2025-10-01 04:46:41.192866', 'step': 21055, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:41.258788', 'step': 21055, 'epoch': 3} {'type': 'loss', 'content': 0.07363888621330261, 'timestamp': '2025-10-01 04:46:41.264724', 'step': 21056, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:41.317105', 'step': 21056, 'epoch': 3} {'type': 'loss', 'content': 0.0853448435664177, 'timestamp': '2025-10-01 04:46:41.319397', 'step': 21057, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:41.372476', 'step': 21057, 'epoch': 3} {'type': 'loss', 'content': 0.1363166719675064, 'timestamp': '2025-10-01 04:46:41.376367', 'step': 21058, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:41.430271', 'step': 21058, 'epoch': 3} {'type': 'loss', 'content': 0.12123104184865952, 'timestamp': '2025-10-01 04:46:41.432859', 'step': 21059, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:41.499220', 'step': 21059, 'epoch': 3} {'type': 'loss', 'content': 0.05037441849708557, 'timestamp': '2025-10-01 04:46:41.505202', 'step': 21060, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:41.557690', 'step': 21060, 'epoch': 3} {'type': 'loss', 'content': 0.05251151695847511, 'timestamp': '2025-10-01 04:46:41.560038', 'step': 21061, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:41.618530', 'step': 21061, 'epoch': 3} {'type': 'loss', 'content': 0.15281949937343597, 'timestamp': '2025-10-01 04:46:41.620890', 'step': 21062, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:41.675217', 'step': 21062, 'epoch': 3} {'type': 'loss', 'content': 0.06369058787822723, 'timestamp': '2025-10-01 04:46:41.677353', 'step': 21063, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:41.735891', 'step': 21063, 'epoch': 3} {'type': 'loss', 'content': 0.0586751326918602, 'timestamp': '2025-10-01 04:46:41.754791', 'step': 21064, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:41.807773', 'step': 21064, 'epoch': 3} {'type': 'loss', 'content': 0.1544133722782135, 'timestamp': '2025-10-01 04:46:41.810151', 'step': 21065, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:41.864368', 'step': 21065, 'epoch': 3} {'type': 'loss', 'content': 0.057729121297597885, 'timestamp': '2025-10-01 04:46:41.866546', 'step': 21066, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:41.919978', 'step': 21066, 'epoch': 3} {'type': 'loss', 'content': 0.10546815395355225, 'timestamp': '2025-10-01 04:46:41.922226', 'step': 21067, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:41.975837', 'step': 21067, 'epoch': 3} {'type': 'loss', 'content': 0.08507376164197922, 'timestamp': '2025-10-01 04:46:41.982936', 'step': 21068, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:42.035570', 'step': 21068, 'epoch': 3} {'type': 'loss', 'content': 0.11769835650920868, 'timestamp': '2025-10-01 04:46:42.037494', 'step': 21069, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:42.090196', 'step': 21069, 'epoch': 3} {'type': 'loss', 'content': 0.1075652465224266, 'timestamp': '2025-10-01 04:46:42.094280', 'step': 21070, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:42.147111', 'step': 21070, 'epoch': 3} {'type': 'loss', 'content': 0.055190522223711014, 'timestamp': '2025-10-01 04:46:42.149226', 'step': 21071, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:42.202151', 'step': 21071, 'epoch': 3} {'type': 'loss', 'content': 0.07168662548065186, 'timestamp': '2025-10-01 04:46:42.207725', 'step': 21072, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:42.261242', 'step': 21072, 'epoch': 3} {'type': 'loss', 'content': 0.15328294038772583, 'timestamp': '2025-10-01 04:46:42.264972', 'step': 21073, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:42.321159', 'step': 21073, 'epoch': 3} {'type': 'loss', 'content': 0.07789362967014313, 'timestamp': '2025-10-01 04:46:42.323240', 'step': 21074, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:42.377543', 'step': 21074, 'epoch': 3} {'type': 'loss', 'content': 0.1313551813364029, 'timestamp': '2025-10-01 04:46:42.384344', 'step': 21075, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:42.441528', 'step': 21075, 'epoch': 3} {'type': 'loss', 'content': 0.13968877494335175, 'timestamp': '2025-10-01 04:46:42.448133', 'step': 21076, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:46:42.500727', 'step': 21076, 'epoch': 3} {'type': 'loss', 'content': 0.06174642592668533, 'timestamp': '2025-10-01 04:46:42.505815', 'step': 21077, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:42.562910', 'step': 21077, 'epoch': 3} {'type': 'loss', 'content': 0.19106434285640717, 'timestamp': '2025-10-01 04:46:42.568400', 'step': 21078, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:42.625669', 'step': 21078, 'epoch': 3} {'type': 'loss', 'content': 0.07350881397724152, 'timestamp': '2025-10-01 04:46:42.627775', 'step': 21079, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:42.682653', 'step': 21079, 'epoch': 3} {'type': 'loss', 'content': 0.038226183503866196, 'timestamp': '2025-10-01 04:46:42.688711', 'step': 21080, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:42.749469', 'step': 21080, 'epoch': 3} {'type': 'loss', 'content': 0.0472218319773674, 'timestamp': '2025-10-01 04:46:42.751629', 'step': 21081, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:42.805526', 'step': 21081, 'epoch': 3} {'type': 'loss', 'content': 0.09565172344446182, 'timestamp': '2025-10-01 04:46:42.807840', 'step': 21082, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:42.868282', 'step': 21082, 'epoch': 3} {'type': 'loss', 'content': 0.06281843781471252, 'timestamp': '2025-10-01 04:46:42.870434', 'step': 21083, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:42.929465', 'step': 21083, 'epoch': 3} {'type': 'loss', 'content': 0.10508101433515549, 'timestamp': '2025-10-01 04:46:42.934942', 'step': 21084, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:42.996673', 'step': 21084, 'epoch': 3} {'type': 'loss', 'content': 0.0856894850730896, 'timestamp': '2025-10-01 04:46:42.998813', 'step': 21085, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:43.053435', 'step': 21085, 'epoch': 3} {'type': 'loss', 'content': 0.06495941430330276, 'timestamp': '2025-10-01 04:46:43.063648', 'step': 21086, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:43.127537', 'step': 21086, 'epoch': 3} {'type': 'loss', 'content': 0.1262359470129013, 'timestamp': '2025-10-01 04:46:43.129616', 'step': 21087, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:43.183234', 'step': 21087, 'epoch': 3} {'type': 'loss', 'content': 0.0813770592212677, 'timestamp': '2025-10-01 04:46:43.191618', 'step': 21088, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:43.244992', 'step': 21088, 'epoch': 3} {'type': 'loss', 'content': 0.06023307517170906, 'timestamp': '2025-10-01 04:46:43.248654', 'step': 21089, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:43.302094', 'step': 21089, 'epoch': 3} {'type': 'loss', 'content': 0.055643677711486816, 'timestamp': '2025-10-01 04:46:43.304869', 'step': 21090, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:43.362539', 'step': 21090, 'epoch': 3} {'type': 'loss', 'content': 0.1072307825088501, 'timestamp': '2025-10-01 04:46:43.376136', 'step': 21091, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:43.430792', 'step': 21091, 'epoch': 3} {'type': 'loss', 'content': 0.07280778884887695, 'timestamp': '2025-10-01 04:46:43.436392', 'step': 21092, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:43.489561', 'step': 21092, 'epoch': 3} {'type': 'loss', 'content': 0.025413155555725098, 'timestamp': '2025-10-01 04:46:43.491661', 'step': 21093, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:43.545504', 'step': 21093, 'epoch': 3} {'type': 'loss', 'content': 0.1499996781349182, 'timestamp': '2025-10-01 04:46:43.547658', 'step': 21094, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:43.600799', 'step': 21094, 'epoch': 3} {'type': 'loss', 'content': 0.05053698271512985, 'timestamp': '2025-10-01 04:46:43.603102', 'step': 21095, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:43.656174', 'step': 21095, 'epoch': 3} {'type': 'loss', 'content': 0.10996270179748535, 'timestamp': '2025-10-01 04:46:43.661991', 'step': 21096, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:43.714991', 'step': 21096, 'epoch': 3} {'type': 'loss', 'content': 0.10690655559301376, 'timestamp': '2025-10-01 04:46:43.717276', 'step': 21097, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:43.777228', 'step': 21097, 'epoch': 3} {'type': 'loss', 'content': 0.1016659289598465, 'timestamp': '2025-10-01 04:46:43.780035', 'step': 21098, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:43.833328', 'step': 21098, 'epoch': 3} {'type': 'loss', 'content': 0.029965076595544815, 'timestamp': '2025-10-01 04:46:43.835384', 'step': 21099, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:43.888113', 'step': 21099, 'epoch': 3} {'type': 'loss', 'content': 0.12195568531751633, 'timestamp': '2025-10-01 04:46:43.893844', 'step': 21100, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:43.946150', 'step': 21100, 'epoch': 3} {'type': 'loss', 'content': 0.037098612636327744, 'timestamp': '2025-10-01 04:46:43.948203', 'step': 21101, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:44.015908', 'step': 21101, 'epoch': 3} {'type': 'loss', 'content': 0.08918504416942596, 'timestamp': '2025-10-01 04:46:44.017978', 'step': 21102, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:44.071010', 'step': 21102, 'epoch': 3} {'type': 'loss', 'content': 0.11809134483337402, 'timestamp': '2025-10-01 04:46:44.073140', 'step': 21103, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:44.126703', 'step': 21103, 'epoch': 3} {'type': 'loss', 'content': 0.08048982173204422, 'timestamp': '2025-10-01 04:46:44.132518', 'step': 21104, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:44.185452', 'step': 21104, 'epoch': 3} {'type': 'loss', 'content': 0.06271160393953323, 'timestamp': '2025-10-01 04:46:44.187571', 'step': 21105, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:46:44.241721', 'step': 21105, 'epoch': 3} {'type': 'loss', 'content': 0.05817538872361183, 'timestamp': '2025-10-01 04:46:44.243789', 'step': 21106, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:44.297319', 'step': 21106, 'epoch': 3} {'type': 'loss', 'content': 0.10652503371238708, 'timestamp': '2025-10-01 04:46:44.299377', 'step': 21107, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:44.353097', 'step': 21107, 'epoch': 3} {'type': 'loss', 'content': 0.06558041274547577, 'timestamp': '2025-10-01 04:46:44.364050', 'step': 21108, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:44.416847', 'step': 21108, 'epoch': 3} {'type': 'loss', 'content': 0.0226677767932415, 'timestamp': '2025-10-01 04:46:44.419794', 'step': 21109, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:44.476900', 'step': 21109, 'epoch': 3} {'type': 'loss', 'content': 0.13812310993671417, 'timestamp': '2025-10-01 04:46:44.478718', 'step': 21110, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:44.531882', 'step': 21110, 'epoch': 3} {'type': 'loss', 'content': 0.0365745946764946, 'timestamp': '2025-10-01 04:46:44.533981', 'step': 21111, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:46:44.587873', 'step': 21111, 'epoch': 3} {'type': 'loss', 'content': 0.16077043116092682, 'timestamp': '2025-10-01 04:46:44.593857', 'step': 21112, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:46:44.646654', 'step': 21112, 'epoch': 3} {'type': 'loss', 'content': 0.08428394049406052, 'timestamp': '2025-10-01 04:46:44.648703', 'step': 21113, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:44.701862', 'step': 21113, 'epoch': 3} {'type': 'loss', 'content': 0.16127687692642212, 'timestamp': '2025-10-01 04:46:44.703950', 'step': 21114, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:46:44.758232', 'step': 21114, 'epoch': 3} {'type': 'loss', 'content': 0.06588007509708405, 'timestamp': '2025-10-01 04:46:44.760319', 'step': 21115, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:44.813765', 'step': 21115, 'epoch': 3} {'type': 'loss', 'content': 0.07898139953613281, 'timestamp': '2025-10-01 04:46:44.819390', 'step': 21116, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:44.871571', 'step': 21116, 'epoch': 3} {'type': 'loss', 'content': 0.0965883806347847, 'timestamp': '2025-10-01 04:46:44.873520', 'step': 21117, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:46:44.926199', 'step': 21117, 'epoch': 3} {'type': 'loss', 'content': 0.0229299608618021, 'timestamp': '2025-10-01 04:46:44.927831', 'step': 21118, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:44.980784', 'step': 21118, 'epoch': 3} {'type': 'loss', 'content': 0.08791494369506836, 'timestamp': '2025-10-01 04:46:44.983114', 'step': 21119, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:45.035688', 'step': 21119, 'epoch': 3} {'type': 'loss', 'content': 0.058104317635297775, 'timestamp': '2025-10-01 04:46:45.041475', 'step': 21120, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:45.093797', 'step': 21120, 'epoch': 3} {'type': 'loss', 'content': 0.1637616753578186, 'timestamp': '2025-10-01 04:46:45.095883', 'step': 21121, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:45.148674', 'step': 21121, 'epoch': 3} {'type': 'loss', 'content': 0.06370625644922256, 'timestamp': '2025-10-01 04:46:45.150739', 'step': 21122, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:45.204401', 'step': 21122, 'epoch': 3} {'type': 'loss', 'content': 0.0633295550942421, 'timestamp': '2025-10-01 04:46:45.215715', 'step': 21123, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:45.269273', 'step': 21123, 'epoch': 3} {'type': 'loss', 'content': 0.13975614309310913, 'timestamp': '2025-10-01 04:46:45.274808', 'step': 21124, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:45.327243', 'step': 21124, 'epoch': 3} {'type': 'loss', 'content': 0.07516410946846008, 'timestamp': '2025-10-01 04:46:45.329353', 'step': 21125, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:45.382671', 'step': 21125, 'epoch': 3} {'type': 'loss', 'content': 0.1636972725391388, 'timestamp': '2025-10-01 04:46:45.384962', 'step': 21126, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:45.450126', 'step': 21126, 'epoch': 3} {'type': 'loss', 'content': 0.09417518973350525, 'timestamp': '2025-10-01 04:46:45.453234', 'step': 21127, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:46:45.506463', 'step': 21127, 'epoch': 3} {'type': 'loss', 'content': 0.060067180544137955, 'timestamp': '2025-10-01 04:46:45.512130', 'step': 21128, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:45.565316', 'step': 21128, 'epoch': 3} {'type': 'loss', 'content': 0.10407616943120956, 'timestamp': '2025-10-01 04:46:45.567648', 'step': 21129, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:46:45.620917', 'step': 21129, 'epoch': 3} {'type': 'loss', 'content': 0.1093895211815834, 'timestamp': '2025-10-01 04:46:45.623094', 'step': 21130, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:45.677935', 'step': 21130, 'epoch': 3} {'type': 'loss', 'content': 0.1297871172428131, 'timestamp': '2025-10-01 04:46:45.679600', 'step': 21131, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:45.732396', 'step': 21131, 'epoch': 3} {'type': 'loss', 'content': 0.10526842623949051, 'timestamp': '2025-10-01 04:46:45.738424', 'step': 21132, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:45.791363', 'step': 21132, 'epoch': 3} {'type': 'loss', 'content': 0.12469731271266937, 'timestamp': '2025-10-01 04:46:45.794162', 'step': 21133, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:46:45.847691', 'step': 21133, 'epoch': 3} {'type': 'loss', 'content': 0.08636442571878433, 'timestamp': '2025-10-01 04:46:45.851785', 'step': 21134, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:45.915573', 'step': 21134, 'epoch': 3} {'type': 'loss', 'content': 0.056348562240600586, 'timestamp': '2025-10-01 04:46:45.917855', 'step': 21135, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:45.970927', 'step': 21135, 'epoch': 3} {'type': 'loss', 'content': 0.04987664893269539, 'timestamp': '2025-10-01 04:46:45.976536', 'step': 21136, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:46.029558', 'step': 21136, 'epoch': 3} {'type': 'loss', 'content': 0.0415622815489769, 'timestamp': '2025-10-01 04:46:46.032547', 'step': 21137, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:46.086904', 'step': 21137, 'epoch': 3} {'type': 'loss', 'content': 0.13721604645252228, 'timestamp': '2025-10-01 04:46:46.088886', 'step': 21138, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:46:46.142721', 'step': 21138, 'epoch': 3} {'type': 'loss', 'content': 0.16377076506614685, 'timestamp': '2025-10-01 04:46:46.145373', 'step': 21139, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:46:46.199417', 'step': 21139, 'epoch': 3} {'type': 'loss', 'content': 0.11141357570886612, 'timestamp': '2025-10-01 04:46:46.205214', 'step': 21140, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:46.259207', 'step': 21140, 'epoch': 3} {'type': 'loss', 'content': 0.051452815532684326, 'timestamp': '2025-10-01 04:46:46.261350', 'step': 21141, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:46:46.314576', 'step': 21141, 'epoch': 3} {'type': 'loss', 'content': 0.06367942690849304, 'timestamp': '2025-10-01 04:46:46.316686', 'step': 21142, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:46.370127', 'step': 21142, 'epoch': 3} {'type': 'loss', 'content': 0.05826200544834137, 'timestamp': '2025-10-01 04:46:46.372397', 'step': 21143, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:46.425605', 'step': 21143, 'epoch': 3} {'type': 'loss', 'content': 0.08058449625968933, 'timestamp': '2025-10-01 04:46:46.431043', 'step': 21144, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:46.484161', 'step': 21144, 'epoch': 3} {'type': 'loss', 'content': 0.043533410876989365, 'timestamp': '2025-10-01 04:46:46.486083', 'step': 21145, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:46.539484', 'step': 21145, 'epoch': 3} {'type': 'loss', 'content': 0.07619833946228027, 'timestamp': '2025-10-01 04:46:46.541678', 'step': 21146, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:46.596775', 'step': 21146, 'epoch': 3} {'type': 'loss', 'content': 0.13784489035606384, 'timestamp': '2025-10-01 04:46:46.599701', 'step': 21147, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:46.653652', 'step': 21147, 'epoch': 3} {'type': 'loss', 'content': 0.1515689492225647, 'timestamp': '2025-10-01 04:46:46.659546', 'step': 21148, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:46.714710', 'step': 21148, 'epoch': 3} {'type': 'loss', 'content': 0.07414194196462631, 'timestamp': '2025-10-01 04:46:46.716804', 'step': 21149, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:46.770321', 'step': 21149, 'epoch': 3} {'type': 'loss', 'content': 0.06548242270946503, 'timestamp': '2025-10-01 04:46:46.772358', 'step': 21150, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:46.844418', 'step': 21150, 'epoch': 3} {'type': 'loss', 'content': 0.05974726378917694, 'timestamp': '2025-10-01 04:46:46.846475', 'step': 21151, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:46.900734', 'step': 21151, 'epoch': 3} {'type': 'loss', 'content': 0.11654799431562424, 'timestamp': '2025-10-01 04:46:46.918073', 'step': 21152, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:46.970941', 'step': 21152, 'epoch': 3} {'type': 'loss', 'content': 0.07198294252157211, 'timestamp': '2025-10-01 04:46:46.972905', 'step': 21153, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:47.026228', 'step': 21153, 'epoch': 3} {'type': 'loss', 'content': 0.10939401388168335, 'timestamp': '2025-10-01 04:46:47.028707', 'step': 21154, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:46:47.093429', 'step': 21154, 'epoch': 3} {'type': 'loss', 'content': 0.09697840362787247, 'timestamp': '2025-10-01 04:46:47.095593', 'step': 21155, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:47.149651', 'step': 21155, 'epoch': 3} {'type': 'loss', 'content': 0.0787375196814537, 'timestamp': '2025-10-01 04:46:47.156818', 'step': 21156, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:47.210506', 'step': 21156, 'epoch': 3} {'type': 'loss', 'content': 0.10226991027593613, 'timestamp': '2025-10-01 04:46:47.219739', 'step': 21157, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:46:47.273624', 'step': 21157, 'epoch': 3} {'type': 'loss', 'content': 0.06196007877588272, 'timestamp': '2025-10-01 04:46:47.282242', 'step': 21158, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:47.335736', 'step': 21158, 'epoch': 3} {'type': 'loss', 'content': 0.11023285239934921, 'timestamp': '2025-10-01 04:46:47.343113', 'step': 21159, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:47.396736', 'step': 21159, 'epoch': 3} {'type': 'loss', 'content': 0.14248676598072052, 'timestamp': '2025-10-01 04:46:47.402204', 'step': 21160, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:46:47.454890', 'step': 21160, 'epoch': 3} {'type': 'loss', 'content': 0.07638202607631683, 'timestamp': '2025-10-01 04:46:47.457344', 'step': 21161, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:47.510824', 'step': 21161, 'epoch': 3} {'type': 'loss', 'content': 0.17341779172420502, 'timestamp': '2025-10-01 04:46:47.513645', 'step': 21162, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:47.567353', 'step': 21162, 'epoch': 3} {'type': 'loss', 'content': 0.07097779214382172, 'timestamp': '2025-10-01 04:46:47.569639', 'step': 21163, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:46:47.623499', 'step': 21163, 'epoch': 3} {'type': 'loss', 'content': 0.073470339179039, 'timestamp': '2025-10-01 04:46:47.629507', 'step': 21164, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:47.682787', 'step': 21164, 'epoch': 3} {'type': 'loss', 'content': 0.04201190918684006, 'timestamp': '2025-10-01 04:46:47.684759', 'step': 21165, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:46:47.738460', 'step': 21165, 'epoch': 3} {'type': 'loss', 'content': 0.05207120627164841, 'timestamp': '2025-10-01 04:46:47.746772', 'step': 21166, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:46:47.806820', 'step': 21166, 'epoch': 3} {'type': 'loss', 'content': 0.11401259899139404, 'timestamp': '2025-10-01 04:46:47.808841', 'step': 21167, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:47.861908', 'step': 21167, 'epoch': 3} {'type': 'loss', 'content': 0.03640421852469444, 'timestamp': '2025-10-01 04:46:47.868138', 'step': 21168, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:47.922560', 'step': 21168, 'epoch': 3} {'type': 'loss', 'content': 0.06498299539089203, 'timestamp': '2025-10-01 04:46:47.924709', 'step': 21169, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:47.978375', 'step': 21169, 'epoch': 3} {'type': 'loss', 'content': 0.1538885235786438, 'timestamp': '2025-10-01 04:46:47.990323', 'step': 21170, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:48.043798', 'step': 21170, 'epoch': 3} {'type': 'loss', 'content': 0.0738089382648468, 'timestamp': '2025-10-01 04:46:48.046419', 'step': 21171, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:48.099567', 'step': 21171, 'epoch': 3} {'type': 'loss', 'content': 0.05203148350119591, 'timestamp': '2025-10-01 04:46:48.104908', 'step': 21172, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:48.157626', 'step': 21172, 'epoch': 3} {'type': 'loss', 'content': 0.04039756581187248, 'timestamp': '2025-10-01 04:46:48.159457', 'step': 21173, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:48.212957', 'step': 21173, 'epoch': 3} {'type': 'loss', 'content': 0.19277916848659515, 'timestamp': '2025-10-01 04:46:48.215091', 'step': 21174, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:48.268920', 'step': 21174, 'epoch': 3} {'type': 'loss', 'content': 0.10100865364074707, 'timestamp': '2025-10-01 04:46:48.271145', 'step': 21175, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:46:48.324810', 'step': 21175, 'epoch': 3} {'type': 'loss', 'content': 0.07922928035259247, 'timestamp': '2025-10-01 04:46:48.330571', 'step': 21176, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:48.383850', 'step': 21176, 'epoch': 3} {'type': 'loss', 'content': 0.06832946091890335, 'timestamp': '2025-10-01 04:46:48.393445', 'step': 21177, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:48.446849', 'step': 21177, 'epoch': 3} {'type': 'loss', 'content': 0.12428480386734009, 'timestamp': '2025-10-01 04:46:48.449435', 'step': 21178, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:46:48.504836', 'step': 21178, 'epoch': 3} {'type': 'loss', 'content': 0.1567544937133789, 'timestamp': '2025-10-01 04:46:48.507143', 'step': 21179, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:48.564138', 'step': 21179, 'epoch': 3} {'type': 'loss', 'content': 0.07568801939487457, 'timestamp': '2025-10-01 04:46:48.570050', 'step': 21180, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:48.623958', 'step': 21180, 'epoch': 3} {'type': 'loss', 'content': 0.033710286021232605, 'timestamp': '2025-10-01 04:46:48.626457', 'step': 21181, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:48.680784', 'step': 21181, 'epoch': 3} {'type': 'loss', 'content': 0.03026946447789669, 'timestamp': '2025-10-01 04:46:48.684075', 'step': 21182, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:48.761277', 'step': 21182, 'epoch': 3} {'type': 'loss', 'content': 0.08233828842639923, 'timestamp': '2025-10-01 04:46:48.763554', 'step': 21183, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:48.818826', 'step': 21183, 'epoch': 3} {'type': 'loss', 'content': 0.14447590708732605, 'timestamp': '2025-10-01 04:46:48.825028', 'step': 21184, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:48.879136', 'step': 21184, 'epoch': 3} {'type': 'loss', 'content': 0.08014398068189621, 'timestamp': '2025-10-01 04:46:48.881369', 'step': 21185, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:48.935192', 'step': 21185, 'epoch': 3} {'type': 'loss', 'content': 0.12357030063867569, 'timestamp': '2025-10-01 04:46:48.937327', 'step': 21186, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:48.991055', 'step': 21186, 'epoch': 3} {'type': 'loss', 'content': 0.0850532203912735, 'timestamp': '2025-10-01 04:46:48.993183', 'step': 21187, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:49.046596', 'step': 21187, 'epoch': 3} {'type': 'loss', 'content': 0.09903524816036224, 'timestamp': '2025-10-01 04:46:49.053673', 'step': 21188, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:49.118854', 'step': 21188, 'epoch': 3} {'type': 'loss', 'content': 0.10609864443540573, 'timestamp': '2025-10-01 04:46:49.121415', 'step': 21189, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:46:49.175288', 'step': 21189, 'epoch': 3} {'type': 'loss', 'content': 0.11385536193847656, 'timestamp': '2025-10-01 04:46:49.177384', 'step': 21190, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:46:49.231052', 'step': 21190, 'epoch': 3} {'type': 'loss', 'content': 0.08125697821378708, 'timestamp': '2025-10-01 04:46:49.233713', 'step': 21191, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:49.287681', 'step': 21191, 'epoch': 3} {'type': 'loss', 'content': 0.06712257117033005, 'timestamp': '2025-10-01 04:46:49.293559', 'step': 21192, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:49.346781', 'step': 21192, 'epoch': 3} {'type': 'loss', 'content': 0.054213982075452805, 'timestamp': '2025-10-01 04:46:49.349095', 'step': 21193, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:46:49.402082', 'step': 21193, 'epoch': 3} {'type': 'loss', 'content': 0.13935726881027222, 'timestamp': '2025-10-01 04:46:49.404171', 'step': 21194, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:49.457528', 'step': 21194, 'epoch': 3} {'type': 'loss', 'content': 0.0584125854074955, 'timestamp': '2025-10-01 04:46:49.459670', 'step': 21195, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:49.513339', 'step': 21195, 'epoch': 3} {'type': 'loss', 'content': 0.17319828271865845, 'timestamp': '2025-10-01 04:46:49.519324', 'step': 21196, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:49.572215', 'step': 21196, 'epoch': 3} {'type': 'loss', 'content': 0.03673025593161583, 'timestamp': '2025-10-01 04:46:49.574557', 'step': 21197, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:49.628107', 'step': 21197, 'epoch': 3} {'type': 'loss', 'content': 0.13034610450267792, 'timestamp': '2025-10-01 04:46:49.630302', 'step': 21198, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:49.683831', 'step': 21198, 'epoch': 3} {'type': 'loss', 'content': 0.10598330199718475, 'timestamp': '2025-10-01 04:46:49.686028', 'step': 21199, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:46:49.739631', 'step': 21199, 'epoch': 3} {'type': 'loss', 'content': 0.07379598915576935, 'timestamp': '2025-10-01 04:46:49.745458', 'step': 21200, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:49.798986', 'step': 21200, 'epoch': 3} {'type': 'loss', 'content': 0.1191883459687233, 'timestamp': '2025-10-01 04:46:49.810254', 'step': 21201, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:49.863872', 'step': 21201, 'epoch': 3} {'type': 'loss', 'content': 0.04139319434762001, 'timestamp': '2025-10-01 04:46:49.866000', 'step': 21202, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:49.919526', 'step': 21202, 'epoch': 3} {'type': 'loss', 'content': 0.1633051633834839, 'timestamp': '2025-10-01 04:46:49.921737', 'step': 21203, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:49.975748', 'step': 21203, 'epoch': 3} {'type': 'loss', 'content': 0.06637290865182877, 'timestamp': '2025-10-01 04:46:49.981486', 'step': 21204, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:50.034939', 'step': 21204, 'epoch': 3} {'type': 'loss', 'content': 0.11690735071897507, 'timestamp': '2025-10-01 04:46:50.037072', 'step': 21205, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:50.090109', 'step': 21205, 'epoch': 3} {'type': 'loss', 'content': 0.1125296801328659, 'timestamp': '2025-10-01 04:46:50.092223', 'step': 21206, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:50.157198', 'step': 21206, 'epoch': 3} {'type': 'loss', 'content': 0.09096044301986694, 'timestamp': '2025-10-01 04:46:50.159348', 'step': 21207, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:46:50.212935', 'step': 21207, 'epoch': 3} {'type': 'loss', 'content': 0.13229575753211975, 'timestamp': '2025-10-01 04:46:50.218762', 'step': 21208, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:50.271633', 'step': 21208, 'epoch': 3} {'type': 'loss', 'content': 0.08866947144269943, 'timestamp': '2025-10-01 04:46:50.273831', 'step': 21209, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:50.327228', 'step': 21209, 'epoch': 3} {'type': 'loss', 'content': 0.11343827098608017, 'timestamp': '2025-10-01 04:46:50.329526', 'step': 21210, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:46:50.383527', 'step': 21210, 'epoch': 3} {'type': 'loss', 'content': 0.11752454936504364, 'timestamp': '2025-10-01 04:46:50.385837', 'step': 21211, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:50.439346', 'step': 21211, 'epoch': 3} {'type': 'loss', 'content': 0.16801796853542328, 'timestamp': '2025-10-01 04:46:50.445050', 'step': 21212, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:46:50.515070', 'step': 21212, 'epoch': 3} {'type': 'loss', 'content': 0.039671946316957474, 'timestamp': '2025-10-01 04:46:50.517249', 'step': 21213, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:50.580902', 'step': 21213, 'epoch': 3} {'type': 'loss', 'content': 0.1941695511341095, 'timestamp': '2025-10-01 04:46:50.586503', 'step': 21214, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:46:50.640827', 'step': 21214, 'epoch': 3} {'type': 'loss', 'content': 0.1688975691795349, 'timestamp': '2025-10-01 04:46:50.642962', 'step': 21215, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:50.696298', 'step': 21215, 'epoch': 3} {'type': 'loss', 'content': 0.06168028339743614, 'timestamp': '2025-10-01 04:46:50.702143', 'step': 21216, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:46:50.755016', 'step': 21216, 'epoch': 3} {'type': 'loss', 'content': 0.06672787666320801, 'timestamp': '2025-10-01 04:46:50.758504', 'step': 21217, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:50.812466', 'step': 21217, 'epoch': 3} {'type': 'loss', 'content': 0.1214057058095932, 'timestamp': '2025-10-01 04:46:50.814603', 'step': 21218, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:46:50.877051', 'step': 21218, 'epoch': 3} {'type': 'loss', 'content': 0.07281579077243805, 'timestamp': '2025-10-01 04:46:50.879208', 'step': 21219, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:50.933408', 'step': 21219, 'epoch': 3} {'type': 'loss', 'content': 0.14442569017410278, 'timestamp': '2025-10-01 04:46:50.939541', 'step': 21220, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:50.992206', 'step': 21220, 'epoch': 3} {'type': 'loss', 'content': 0.09954340755939484, 'timestamp': '2025-10-01 04:46:50.994266', 'step': 21221, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:51.047817', 'step': 21221, 'epoch': 3} {'type': 'loss', 'content': 0.030765391886234283, 'timestamp': '2025-10-01 04:46:51.050225', 'step': 21222, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:46:51.113788', 'step': 21222, 'epoch': 3} {'type': 'loss', 'content': 0.0404914952814579, 'timestamp': '2025-10-01 04:46:51.116433', 'step': 21223, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:51.174325', 'step': 21223, 'epoch': 3} {'type': 'loss', 'content': 0.04816664382815361, 'timestamp': '2025-10-01 04:46:51.180636', 'step': 21224, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:46:51.234244', 'step': 21224, 'epoch': 3} {'type': 'loss', 'content': 0.11314099282026291, 'timestamp': '2025-10-01 04:46:51.236595', 'step': 21225, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:51.290390', 'step': 21225, 'epoch': 3} {'type': 'loss', 'content': 0.07583727687597275, 'timestamp': '2025-10-01 04:46:51.297836', 'step': 21226, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:51.351508', 'step': 21226, 'epoch': 3} {'type': 'loss', 'content': 0.15023073554039001, 'timestamp': '2025-10-01 04:46:51.353730', 'step': 21227, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-01 04:46:51.407565', 'step': 21227, 'epoch': 3} {'type': 'loss', 'content': 0.04399396851658821, 'timestamp': '2025-10-01 04:46:51.413889', 'step': 21228, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:51.468897', 'step': 21228, 'epoch': 3} {'type': 'loss', 'content': 0.05579502135515213, 'timestamp': '2025-10-01 04:46:51.471010', 'step': 21229, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:51.524236', 'step': 21229, 'epoch': 3} {'type': 'loss', 'content': 0.09469743072986603, 'timestamp': '2025-10-01 04:46:51.526452', 'step': 21230, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:51.579908', 'step': 21230, 'epoch': 3} {'type': 'loss', 'content': 0.07774616777896881, 'timestamp': '2025-10-01 04:46:51.582118', 'step': 21231, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:51.635437', 'step': 21231, 'epoch': 3} {'type': 'loss', 'content': 0.020255662500858307, 'timestamp': '2025-10-01 04:46:51.641346', 'step': 21232, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:51.694060', 'step': 21232, 'epoch': 3} {'type': 'loss', 'content': 0.1192917451262474, 'timestamp': '2025-10-01 04:46:51.696560', 'step': 21233, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:51.750890', 'step': 21233, 'epoch': 3} {'type': 'loss', 'content': 0.10453607887029648, 'timestamp': '2025-10-01 04:46:51.753305', 'step': 21234, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:51.808612', 'step': 21234, 'epoch': 3} {'type': 'loss', 'content': 0.1866276115179062, 'timestamp': '2025-10-01 04:46:51.810901', 'step': 21235, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:51.865190', 'step': 21235, 'epoch': 3} {'type': 'loss', 'content': 0.13941989839076996, 'timestamp': '2025-10-01 04:46:51.871876', 'step': 21236, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:51.926214', 'step': 21236, 'epoch': 3} {'type': 'loss', 'content': 0.13047964870929718, 'timestamp': '2025-10-01 04:46:51.928754', 'step': 21237, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:51.983357', 'step': 21237, 'epoch': 3} {'type': 'loss', 'content': 0.20227092504501343, 'timestamp': '2025-10-01 04:46:51.985785', 'step': 21238, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:52.040341', 'step': 21238, 'epoch': 3} {'type': 'loss', 'content': 0.19638493657112122, 'timestamp': '2025-10-01 04:46:52.042777', 'step': 21239, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:46:52.097397', 'step': 21239, 'epoch': 3} {'type': 'loss', 'content': 0.07478167861700058, 'timestamp': '2025-10-01 04:46:52.103736', 'step': 21240, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:52.158110', 'step': 21240, 'epoch': 3} {'type': 'loss', 'content': 0.11105763912200928, 'timestamp': '2025-10-01 04:46:52.160605', 'step': 21241, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:52.214096', 'step': 21241, 'epoch': 3} {'type': 'loss', 'content': 0.21076899766921997, 'timestamp': '2025-10-01 04:46:52.226005', 'step': 21242, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:46:52.281433', 'step': 21242, 'epoch': 3} {'type': 'loss', 'content': 0.10169254243373871, 'timestamp': '2025-10-01 04:46:52.284062', 'step': 21243, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:52.338452', 'step': 21243, 'epoch': 3} {'type': 'loss', 'content': 0.07530148327350616, 'timestamp': '2025-10-01 04:46:52.344469', 'step': 21244, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:52.399813', 'step': 21244, 'epoch': 3} {'type': 'loss', 'content': 0.1320534497499466, 'timestamp': '2025-10-01 04:46:52.402557', 'step': 21245, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:46:52.456303', 'step': 21245, 'epoch': 3} {'type': 'loss', 'content': 0.04277889430522919, 'timestamp': '2025-10-01 04:46:52.458781', 'step': 21246, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:52.512908', 'step': 21246, 'epoch': 3} {'type': 'loss', 'content': 0.047437336295843124, 'timestamp': '2025-10-01 04:46:52.515400', 'step': 21247, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:52.569708', 'step': 21247, 'epoch': 3} {'type': 'loss', 'content': 0.047951940447092056, 'timestamp': '2025-10-01 04:46:52.575675', 'step': 21248, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:52.629170', 'step': 21248, 'epoch': 3} {'type': 'loss', 'content': 0.07725998014211655, 'timestamp': '2025-10-01 04:46:52.631279', 'step': 21249, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:52.685488', 'step': 21249, 'epoch': 3} {'type': 'loss', 'content': 0.10043003410100937, 'timestamp': '2025-10-01 04:46:52.687707', 'step': 21250, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:52.742029', 'step': 21250, 'epoch': 3} {'type': 'loss', 'content': 0.11067507416009903, 'timestamp': '2025-10-01 04:46:52.744564', 'step': 21251, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:52.808905', 'step': 21251, 'epoch': 3} {'type': 'loss', 'content': 0.05795268714427948, 'timestamp': '2025-10-01 04:46:52.815190', 'step': 21252, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:46:52.880383', 'step': 21252, 'epoch': 3} {'type': 'loss', 'content': 0.046368736773729324, 'timestamp': '2025-10-01 04:46:52.882995', 'step': 21253, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:52.937559', 'step': 21253, 'epoch': 3} {'type': 'loss', 'content': 0.13263718783855438, 'timestamp': '2025-10-01 04:46:52.939976', 'step': 21254, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:52.994993', 'step': 21254, 'epoch': 3} {'type': 'loss', 'content': 0.08989936858415604, 'timestamp': '2025-10-01 04:46:52.997420', 'step': 21255, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:53.052420', 'step': 21255, 'epoch': 3} {'type': 'loss', 'content': 0.1220429539680481, 'timestamp': '2025-10-01 04:46:53.058288', 'step': 21256, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:46:53.113594', 'step': 21256, 'epoch': 3} {'type': 'loss', 'content': 0.07890962064266205, 'timestamp': '2025-10-01 04:46:53.121340', 'step': 21257, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:46:53.177058', 'step': 21257, 'epoch': 3} {'type': 'loss', 'content': 0.08969315141439438, 'timestamp': '2025-10-01 04:46:53.179280', 'step': 21258, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:46:53.235037', 'step': 21258, 'epoch': 3} {'type': 'loss', 'content': 0.09608074277639389, 'timestamp': '2025-10-01 04:46:53.237189', 'step': 21259, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:53.291491', 'step': 21259, 'epoch': 3} {'type': 'loss', 'content': 0.081646628677845, 'timestamp': '2025-10-01 04:46:53.297838', 'step': 21260, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:53.351697', 'step': 21260, 'epoch': 3} {'type': 'loss', 'content': 0.08608099073171616, 'timestamp': '2025-10-01 04:46:53.353823', 'step': 21261, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:53.410931', 'step': 21261, 'epoch': 3} {'type': 'loss', 'content': 0.10013855993747711, 'timestamp': '2025-10-01 04:46:53.420561', 'step': 21262, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:53.475549', 'step': 21262, 'epoch': 3} {'type': 'loss', 'content': 0.14409293234348297, 'timestamp': '2025-10-01 04:46:53.477771', 'step': 21263, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:53.531756', 'step': 21263, 'epoch': 3} {'type': 'loss', 'content': 0.04692776873707771, 'timestamp': '2025-10-01 04:46:53.538198', 'step': 21264, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:53.591893', 'step': 21264, 'epoch': 3} {'type': 'loss', 'content': 0.15569020807743073, 'timestamp': '2025-10-01 04:46:53.599090', 'step': 21265, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:53.667072', 'step': 21265, 'epoch': 3} {'type': 'loss', 'content': 0.05427849665284157, 'timestamp': '2025-10-01 04:46:53.669225', 'step': 21266, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:53.723623', 'step': 21266, 'epoch': 3} {'type': 'loss', 'content': 0.13067737221717834, 'timestamp': '2025-10-01 04:46:53.725842', 'step': 21267, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:53.790317', 'step': 21267, 'epoch': 3} {'type': 'loss', 'content': 0.0673036277294159, 'timestamp': '2025-10-01 04:46:53.796558', 'step': 21268, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:53.853329', 'step': 21268, 'epoch': 3} {'type': 'loss', 'content': 0.05319979414343834, 'timestamp': '2025-10-01 04:46:53.855523', 'step': 21269, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:53.910713', 'step': 21269, 'epoch': 3} {'type': 'loss', 'content': 0.11844244599342346, 'timestamp': '2025-10-01 04:46:53.912931', 'step': 21270, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:53.966201', 'step': 21270, 'epoch': 3} {'type': 'loss', 'content': 0.07002684473991394, 'timestamp': '2025-10-01 04:46:53.968407', 'step': 21271, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:46:54.022503', 'step': 21271, 'epoch': 3} {'type': 'loss', 'content': 0.06265706568956375, 'timestamp': '2025-10-01 04:46:54.028485', 'step': 21272, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:54.081627', 'step': 21272, 'epoch': 3} {'type': 'loss', 'content': 0.06523448973894119, 'timestamp': '2025-10-01 04:46:54.083790', 'step': 21273, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:54.139152', 'step': 21273, 'epoch': 3} {'type': 'loss', 'content': 0.10825029015541077, 'timestamp': '2025-10-01 04:46:54.151575', 'step': 21274, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:54.206061', 'step': 21274, 'epoch': 3} {'type': 'loss', 'content': 0.1326785534620285, 'timestamp': '2025-10-01 04:46:54.208282', 'step': 21275, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:54.264702', 'step': 21275, 'epoch': 3} {'type': 'loss', 'content': 0.07850340008735657, 'timestamp': '2025-10-01 04:46:54.270608', 'step': 21276, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:46:54.325559', 'step': 21276, 'epoch': 3} {'type': 'loss', 'content': 0.06844265758991241, 'timestamp': '2025-10-01 04:46:54.327736', 'step': 21277, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:54.382320', 'step': 21277, 'epoch': 3} {'type': 'loss', 'content': 0.14157314598560333, 'timestamp': '2025-10-01 04:46:54.385328', 'step': 21278, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:46:54.438976', 'step': 21278, 'epoch': 3} {'type': 'loss', 'content': 0.10158208012580872, 'timestamp': '2025-10-01 04:46:54.448471', 'step': 21279, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:54.504921', 'step': 21279, 'epoch': 3} {'type': 'loss', 'content': 0.19477589428424835, 'timestamp': '2025-10-01 04:46:54.512732', 'step': 21280, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:54.572918', 'step': 21280, 'epoch': 3} {'type': 'loss', 'content': 0.06693320721387863, 'timestamp': '2025-10-01 04:46:54.580569', 'step': 21281, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:54.636456', 'step': 21281, 'epoch': 3} {'type': 'loss', 'content': 0.07933568954467773, 'timestamp': '2025-10-01 04:46:54.638719', 'step': 21282, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:54.695350', 'step': 21282, 'epoch': 3} {'type': 'loss', 'content': 0.07312259078025818, 'timestamp': '2025-10-01 04:46:54.697800', 'step': 21283, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:54.764575', 'step': 21283, 'epoch': 3} {'type': 'loss', 'content': 0.16481295228004456, 'timestamp': '2025-10-01 04:46:54.770599', 'step': 21284, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:54.841087', 'step': 21284, 'epoch': 3} {'type': 'loss', 'content': 0.06114065274596214, 'timestamp': '2025-10-01 04:46:54.843665', 'step': 21285, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:54.897072', 'step': 21285, 'epoch': 3} {'type': 'loss', 'content': 0.08232320845127106, 'timestamp': '2025-10-01 04:46:54.910875', 'step': 21286, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:54.967800', 'step': 21286, 'epoch': 3} {'type': 'loss', 'content': 0.07450787723064423, 'timestamp': '2025-10-01 04:46:54.970136', 'step': 21287, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:46:55.034622', 'step': 21287, 'epoch': 3} {'type': 'loss', 'content': 0.08760417997837067, 'timestamp': '2025-10-01 04:46:55.041248', 'step': 21288, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:46:55.094755', 'step': 21288, 'epoch': 3} {'type': 'loss', 'content': 0.0926932767033577, 'timestamp': '2025-10-01 04:46:55.097070', 'step': 21289, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:55.151455', 'step': 21289, 'epoch': 3} {'type': 'loss', 'content': 0.1417168825864792, 'timestamp': '2025-10-01 04:46:55.155098', 'step': 21290, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:55.211888', 'step': 21290, 'epoch': 3} {'type': 'loss', 'content': 0.09279131144285202, 'timestamp': '2025-10-01 04:46:55.235025', 'step': 21291, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:55.289683', 'step': 21291, 'epoch': 3} {'type': 'loss', 'content': 0.05858055129647255, 'timestamp': '2025-10-01 04:46:55.295730', 'step': 21292, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:55.349595', 'step': 21292, 'epoch': 3} {'type': 'loss', 'content': 0.06742848455905914, 'timestamp': '2025-10-01 04:46:55.361127', 'step': 21293, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:55.418126', 'step': 21293, 'epoch': 3} {'type': 'loss', 'content': 0.06517957150936127, 'timestamp': '2025-10-01 04:46:55.420792', 'step': 21294, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:55.489740', 'step': 21294, 'epoch': 3} {'type': 'loss', 'content': 0.06556875258684158, 'timestamp': '2025-10-01 04:46:55.491942', 'step': 21295, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:55.548304', 'step': 21295, 'epoch': 3} {'type': 'loss', 'content': 0.10286790132522583, 'timestamp': '2025-10-01 04:46:55.565403', 'step': 21296, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:55.624128', 'step': 21296, 'epoch': 3} {'type': 'loss', 'content': 0.06996916979551315, 'timestamp': '2025-10-01 04:46:55.627579', 'step': 21297, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:55.683438', 'step': 21297, 'epoch': 3} {'type': 'loss', 'content': 0.08847183734178543, 'timestamp': '2025-10-01 04:46:55.685662', 'step': 21298, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:46:55.744674', 'step': 21298, 'epoch': 3} {'type': 'loss', 'content': 0.10913337022066116, 'timestamp': '2025-10-01 04:46:55.746779', 'step': 21299, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:55.801324', 'step': 21299, 'epoch': 3} {'type': 'loss', 'content': 0.11442026495933533, 'timestamp': '2025-10-01 04:46:55.807505', 'step': 21300, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:55.860557', 'step': 21300, 'epoch': 3} {'type': 'loss', 'content': 0.06053415685892105, 'timestamp': '2025-10-01 04:46:55.862655', 'step': 21301, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:46:55.916365', 'step': 21301, 'epoch': 3} {'type': 'loss', 'content': 0.09229297190904617, 'timestamp': '2025-10-01 04:46:55.927350', 'step': 21302, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:56.004315', 'step': 21302, 'epoch': 3} {'type': 'loss', 'content': 0.050876252353191376, 'timestamp': '2025-10-01 04:46:56.006581', 'step': 21303, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:56.060618', 'step': 21303, 'epoch': 3} {'type': 'loss', 'content': 0.07325652241706848, 'timestamp': '2025-10-01 04:46:56.066603', 'step': 21304, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:56.120238', 'step': 21304, 'epoch': 3} {'type': 'loss', 'content': 0.07908330112695694, 'timestamp': '2025-10-01 04:46:56.122402', 'step': 21305, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:56.175911', 'step': 21305, 'epoch': 3} {'type': 'loss', 'content': 0.17567896842956543, 'timestamp': '2025-10-01 04:46:56.178168', 'step': 21306, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:46:56.232207', 'step': 21306, 'epoch': 3} {'type': 'loss', 'content': 0.16051055490970612, 'timestamp': '2025-10-01 04:46:56.234409', 'step': 21307, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:56.288025', 'step': 21307, 'epoch': 3} {'type': 'loss', 'content': 0.15558519959449768, 'timestamp': '2025-10-01 04:46:56.293978', 'step': 21308, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:56.347803', 'step': 21308, 'epoch': 3} {'type': 'loss', 'content': 0.09017173945903778, 'timestamp': '2025-10-01 04:46:56.351294', 'step': 21309, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:46:56.406365', 'step': 21309, 'epoch': 3} {'type': 'loss', 'content': 0.07802734524011612, 'timestamp': '2025-10-01 04:46:56.408740', 'step': 21310, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:56.471968', 'step': 21310, 'epoch': 3} {'type': 'loss', 'content': 0.07200340181589127, 'timestamp': '2025-10-01 04:46:56.474288', 'step': 21311, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:56.529339', 'step': 21311, 'epoch': 3} {'type': 'loss', 'content': 0.1662062555551529, 'timestamp': '2025-10-01 04:46:56.535982', 'step': 21312, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:56.590406', 'step': 21312, 'epoch': 3} {'type': 'loss', 'content': 0.07181666791439056, 'timestamp': '2025-10-01 04:46:56.592545', 'step': 21313, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:56.647641', 'step': 21313, 'epoch': 3} {'type': 'loss', 'content': 0.13730747997760773, 'timestamp': '2025-10-01 04:46:56.650263', 'step': 21314, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:56.712901', 'step': 21314, 'epoch': 3} {'type': 'loss', 'content': 0.05778762325644493, 'timestamp': '2025-10-01 04:46:56.715006', 'step': 21315, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:46:56.769809', 'step': 21315, 'epoch': 3} {'type': 'loss', 'content': 0.11428561061620712, 'timestamp': '2025-10-01 04:46:56.775981', 'step': 21316, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:56.829424', 'step': 21316, 'epoch': 3} {'type': 'loss', 'content': 0.06905441731214523, 'timestamp': '2025-10-01 04:46:56.831568', 'step': 21317, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:56.899374', 'step': 21317, 'epoch': 3} {'type': 'loss', 'content': 0.1230970174074173, 'timestamp': '2025-10-01 04:46:56.901531', 'step': 21318, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:56.954955', 'step': 21318, 'epoch': 3} {'type': 'loss', 'content': 0.09222044795751572, 'timestamp': '2025-10-01 04:46:56.957642', 'step': 21319, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:57.011540', 'step': 21319, 'epoch': 3} {'type': 'loss', 'content': 0.06926680356264114, 'timestamp': '2025-10-01 04:46:57.023256', 'step': 21320, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:57.083107', 'step': 21320, 'epoch': 3} {'type': 'loss', 'content': 0.11690961569547653, 'timestamp': '2025-10-01 04:46:57.085229', 'step': 21321, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:57.138450', 'step': 21321, 'epoch': 3} {'type': 'loss', 'content': 0.1599707156419754, 'timestamp': '2025-10-01 04:46:57.140691', 'step': 21322, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:57.195267', 'step': 21322, 'epoch': 3} {'type': 'loss', 'content': 0.12206917256116867, 'timestamp': '2025-10-01 04:46:57.197387', 'step': 21323, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:57.251546', 'step': 21323, 'epoch': 3} {'type': 'loss', 'content': 0.10072047263383865, 'timestamp': '2025-10-01 04:46:57.257455', 'step': 21324, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:57.310681', 'step': 21324, 'epoch': 3} {'type': 'loss', 'content': 0.08635646104812622, 'timestamp': '2025-10-01 04:46:57.313011', 'step': 21325, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:57.380154', 'step': 21325, 'epoch': 3} {'type': 'loss', 'content': 0.05062016099691391, 'timestamp': '2025-10-01 04:46:57.382460', 'step': 21326, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:46:57.437042', 'step': 21326, 'epoch': 3} {'type': 'loss', 'content': 0.1047263815999031, 'timestamp': '2025-10-01 04:46:57.439662', 'step': 21327, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:57.492805', 'step': 21327, 'epoch': 3} {'type': 'loss', 'content': 0.042024996131658554, 'timestamp': '2025-10-01 04:46:57.498775', 'step': 21328, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:57.558973', 'step': 21328, 'epoch': 3} {'type': 'loss', 'content': 0.07279053330421448, 'timestamp': '2025-10-01 04:46:57.561400', 'step': 21329, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:57.632356', 'step': 21329, 'epoch': 3} {'type': 'loss', 'content': 0.07021098583936691, 'timestamp': '2025-10-01 04:46:57.641052', 'step': 21330, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:57.695069', 'step': 21330, 'epoch': 3} {'type': 'loss', 'content': 0.04139859601855278, 'timestamp': '2025-10-01 04:46:57.697190', 'step': 21331, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:57.751869', 'step': 21331, 'epoch': 3} {'type': 'loss', 'content': 0.035024095326662064, 'timestamp': '2025-10-01 04:46:57.757700', 'step': 21332, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:46:57.812096', 'step': 21332, 'epoch': 3} {'type': 'loss', 'content': 0.06861929595470428, 'timestamp': '2025-10-01 04:46:57.814298', 'step': 21333, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:46:57.868784', 'step': 21333, 'epoch': 3} {'type': 'loss', 'content': 0.11633837968111038, 'timestamp': '2025-10-01 04:46:57.871608', 'step': 21334, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:57.925764', 'step': 21334, 'epoch': 3} {'type': 'loss', 'content': 0.07205283641815186, 'timestamp': '2025-10-01 04:46:57.928281', 'step': 21335, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:57.981990', 'step': 21335, 'epoch': 3} {'type': 'loss', 'content': 0.09423206746578217, 'timestamp': '2025-10-01 04:46:57.988295', 'step': 21336, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:58.041400', 'step': 21336, 'epoch': 3} {'type': 'loss', 'content': 0.05180133506655693, 'timestamp': '2025-10-01 04:46:58.043406', 'step': 21337, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:58.097938', 'step': 21337, 'epoch': 3} {'type': 'loss', 'content': 0.02211100421845913, 'timestamp': '2025-10-01 04:46:58.100093', 'step': 21338, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:58.153946', 'step': 21338, 'epoch': 3} {'type': 'loss', 'content': 0.11663556098937988, 'timestamp': '2025-10-01 04:46:58.156175', 'step': 21339, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:58.210077', 'step': 21339, 'epoch': 3} {'type': 'loss', 'content': 0.08401080220937729, 'timestamp': '2025-10-01 04:46:58.215990', 'step': 21340, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:58.283307', 'step': 21340, 'epoch': 3} {'type': 'loss', 'content': 0.06300465762615204, 'timestamp': '2025-10-01 04:46:58.296124', 'step': 21341, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:58.353317', 'step': 21341, 'epoch': 3} {'type': 'loss', 'content': 0.13274401426315308, 'timestamp': '2025-10-01 04:46:58.355515', 'step': 21342, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:58.420938', 'step': 21342, 'epoch': 3} {'type': 'loss', 'content': 0.06470803171396255, 'timestamp': '2025-10-01 04:46:58.423087', 'step': 21343, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:46:58.489575', 'step': 21343, 'epoch': 3} {'type': 'loss', 'content': 0.0789654403924942, 'timestamp': '2025-10-01 04:46:58.506914', 'step': 21344, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:58.559604', 'step': 21344, 'epoch': 3} {'type': 'loss', 'content': 0.0428292378783226, 'timestamp': '2025-10-01 04:46:58.561758', 'step': 21345, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:58.615553', 'step': 21345, 'epoch': 3} {'type': 'loss', 'content': 0.03353508934378624, 'timestamp': '2025-10-01 04:46:58.618325', 'step': 21346, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:58.671946', 'step': 21346, 'epoch': 3} {'type': 'loss', 'content': 0.06361249834299088, 'timestamp': '2025-10-01 04:46:58.674958', 'step': 21347, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:46:58.737893', 'step': 21347, 'epoch': 3} {'type': 'loss', 'content': 0.09661740809679031, 'timestamp': '2025-10-01 04:46:58.743816', 'step': 21348, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:58.804579', 'step': 21348, 'epoch': 3} {'type': 'loss', 'content': 0.06280148774385452, 'timestamp': '2025-10-01 04:46:58.806577', 'step': 21349, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:46:58.860655', 'step': 21349, 'epoch': 3} {'type': 'loss', 'content': 0.11374069005250931, 'timestamp': '2025-10-01 04:46:58.863038', 'step': 21350, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:58.917136', 'step': 21350, 'epoch': 3} {'type': 'loss', 'content': 0.08472616225481033, 'timestamp': '2025-10-01 04:46:58.920268', 'step': 21351, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:58.974230', 'step': 21351, 'epoch': 3} {'type': 'loss', 'content': 0.15856750309467316, 'timestamp': '2025-10-01 04:46:58.980394', 'step': 21352, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:59.034035', 'step': 21352, 'epoch': 3} {'type': 'loss', 'content': 0.06248607113957405, 'timestamp': '2025-10-01 04:46:59.044879', 'step': 21353, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:59.099804', 'step': 21353, 'epoch': 3} {'type': 'loss', 'content': 0.03473812714219093, 'timestamp': '2025-10-01 04:46:59.102163', 'step': 21354, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:59.156318', 'step': 21354, 'epoch': 3} {'type': 'loss', 'content': 0.11276791989803314, 'timestamp': '2025-10-01 04:46:59.158616', 'step': 21355, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:59.212282', 'step': 21355, 'epoch': 3} {'type': 'loss', 'content': 0.12976357340812683, 'timestamp': '2025-10-01 04:46:59.224456', 'step': 21356, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:59.281899', 'step': 21356, 'epoch': 3} {'type': 'loss', 'content': 0.1089874729514122, 'timestamp': '2025-10-01 04:46:59.284212', 'step': 21357, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:59.345284', 'step': 21357, 'epoch': 3} {'type': 'loss', 'content': 0.06461920589208603, 'timestamp': '2025-10-01 04:46:59.347416', 'step': 21358, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:59.406213', 'step': 21358, 'epoch': 3} {'type': 'loss', 'content': 0.1221696138381958, 'timestamp': '2025-10-01 04:46:59.408494', 'step': 21359, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:59.463202', 'step': 21359, 'epoch': 3} {'type': 'loss', 'content': 0.02049008384346962, 'timestamp': '2025-10-01 04:46:59.468859', 'step': 21360, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:59.522388', 'step': 21360, 'epoch': 3} {'type': 'loss', 'content': 0.07658788561820984, 'timestamp': '2025-10-01 04:46:59.524646', 'step': 21361, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:59.584505', 'step': 21361, 'epoch': 3} {'type': 'loss', 'content': 0.08842772990465164, 'timestamp': '2025-10-01 04:46:59.596193', 'step': 21362, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:59.650136', 'step': 21362, 'epoch': 3} {'type': 'loss', 'content': 0.07456891983747482, 'timestamp': '2025-10-01 04:46:59.653052', 'step': 21363, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:59.706899', 'step': 21363, 'epoch': 3} {'type': 'loss', 'content': 0.09151143580675125, 'timestamp': '2025-10-01 04:46:59.712834', 'step': 21364, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:59.765602', 'step': 21364, 'epoch': 3} {'type': 'loss', 'content': 0.09150945395231247, 'timestamp': '2025-10-01 04:46:59.769198', 'step': 21365, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:46:59.822914', 'step': 21365, 'epoch': 3} {'type': 'loss', 'content': 0.12351901829242706, 'timestamp': '2025-10-01 04:46:59.825136', 'step': 21366, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:46:59.878817', 'step': 21366, 'epoch': 3} {'type': 'loss', 'content': 0.1296890676021576, 'timestamp': '2025-10-01 04:46:59.880983', 'step': 21367, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:46:59.936827', 'step': 21367, 'epoch': 3} {'type': 'loss', 'content': 0.13796329498291016, 'timestamp': '2025-10-01 04:46:59.942696', 'step': 21368, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:00.014800', 'step': 21368, 'epoch': 3} {'type': 'loss', 'content': 0.05178060382604599, 'timestamp': '2025-10-01 04:47:00.017548', 'step': 21369, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:47:00.071890', 'step': 21369, 'epoch': 3} {'type': 'loss', 'content': 0.04675440862774849, 'timestamp': '2025-10-01 04:47:00.074061', 'step': 21370, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:00.128009', 'step': 21370, 'epoch': 3} {'type': 'loss', 'content': 0.060617636889219284, 'timestamp': '2025-10-01 04:47:00.130132', 'step': 21371, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:00.183871', 'step': 21371, 'epoch': 3} {'type': 'loss', 'content': 0.06579160690307617, 'timestamp': '2025-10-01 04:47:00.189531', 'step': 21372, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:00.242553', 'step': 21372, 'epoch': 3} {'type': 'loss', 'content': 0.13326741755008698, 'timestamp': '2025-10-01 04:47:00.244692', 'step': 21373, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:00.298459', 'step': 21373, 'epoch': 3} {'type': 'loss', 'content': 0.08971461653709412, 'timestamp': '2025-10-01 04:47:00.300667', 'step': 21374, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:47:00.354537', 'step': 21374, 'epoch': 3} {'type': 'loss', 'content': 0.07195324450731277, 'timestamp': '2025-10-01 04:47:00.356749', 'step': 21375, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:47:00.411877', 'step': 21375, 'epoch': 3} {'type': 'loss', 'content': 0.10131347924470901, 'timestamp': '2025-10-01 04:47:00.417536', 'step': 21376, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:00.470582', 'step': 21376, 'epoch': 3} {'type': 'loss', 'content': 0.0852656438946724, 'timestamp': '2025-10-01 04:47:00.473029', 'step': 21377, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:00.527329', 'step': 21377, 'epoch': 3} {'type': 'loss', 'content': 0.0568983219563961, 'timestamp': '2025-10-01 04:47:00.529523', 'step': 21378, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:00.583239', 'step': 21378, 'epoch': 3} {'type': 'loss', 'content': 0.1392865926027298, 'timestamp': '2025-10-01 04:47:00.585571', 'step': 21379, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:00.639466', 'step': 21379, 'epoch': 3} {'type': 'loss', 'content': 0.06477640569210052, 'timestamp': '2025-10-01 04:47:00.645283', 'step': 21380, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:00.708884', 'step': 21380, 'epoch': 3} {'type': 'loss', 'content': 0.05796915665268898, 'timestamp': '2025-10-01 04:47:00.711099', 'step': 21381, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:47:00.764710', 'step': 21381, 'epoch': 3} {'type': 'loss', 'content': 0.05230041965842247, 'timestamp': '2025-10-01 04:47:00.767562', 'step': 21382, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:47:00.822427', 'step': 21382, 'epoch': 3} {'type': 'loss', 'content': 0.08585746586322784, 'timestamp': '2025-10-01 04:47:00.824870', 'step': 21383, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:00.878476', 'step': 21383, 'epoch': 3} {'type': 'loss', 'content': 0.12614497542381287, 'timestamp': '2025-10-01 04:47:00.884861', 'step': 21384, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:00.940311', 'step': 21384, 'epoch': 3} {'type': 'loss', 'content': 0.13261368870735168, 'timestamp': '2025-10-01 04:47:00.948221', 'step': 21385, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:01.003024', 'step': 21385, 'epoch': 3} {'type': 'loss', 'content': 0.023114778101444244, 'timestamp': '2025-10-01 04:47:01.005473', 'step': 21386, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:01.063263', 'step': 21386, 'epoch': 3} {'type': 'loss', 'content': 0.05422781780362129, 'timestamp': '2025-10-01 04:47:01.065778', 'step': 21387, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:01.120694', 'step': 21387, 'epoch': 3} {'type': 'loss', 'content': 0.1052471399307251, 'timestamp': '2025-10-01 04:47:01.126758', 'step': 21388, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:01.196023', 'step': 21388, 'epoch': 3} {'type': 'loss', 'content': 0.06183931231498718, 'timestamp': '2025-10-01 04:47:01.199168', 'step': 21389, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:01.254963', 'step': 21389, 'epoch': 3} {'type': 'loss', 'content': 0.02514021284878254, 'timestamp': '2025-10-01 04:47:01.257695', 'step': 21390, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:01.313014', 'step': 21390, 'epoch': 3} {'type': 'loss', 'content': 0.07401963323354721, 'timestamp': '2025-10-01 04:47:01.315662', 'step': 21391, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:01.370005', 'step': 21391, 'epoch': 3} {'type': 'loss', 'content': 0.05404273048043251, 'timestamp': '2025-10-01 04:47:01.375691', 'step': 21392, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:01.429131', 'step': 21392, 'epoch': 3} {'type': 'loss', 'content': 0.111317478120327, 'timestamp': '2025-10-01 04:47:01.431529', 'step': 21393, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:01.485497', 'step': 21393, 'epoch': 3} {'type': 'loss', 'content': 0.06424833834171295, 'timestamp': '2025-10-01 04:47:01.488077', 'step': 21394, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:47:01.543101', 'step': 21394, 'epoch': 3} {'type': 'loss', 'content': 0.07763607054948807, 'timestamp': '2025-10-01 04:47:01.554548', 'step': 21395, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:01.609174', 'step': 21395, 'epoch': 3} {'type': 'loss', 'content': 0.0602683424949646, 'timestamp': '2025-10-01 04:47:01.615405', 'step': 21396, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:01.670967', 'step': 21396, 'epoch': 3} {'type': 'loss', 'content': 0.09605330973863602, 'timestamp': '2025-10-01 04:47:01.673616', 'step': 21397, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:01.727954', 'step': 21397, 'epoch': 3} {'type': 'loss', 'content': 0.08879224956035614, 'timestamp': '2025-10-01 04:47:01.730628', 'step': 21398, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:01.784552', 'step': 21398, 'epoch': 3} {'type': 'loss', 'content': 0.09646665304899216, 'timestamp': '2025-10-01 04:47:01.786948', 'step': 21399, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:01.842155', 'step': 21399, 'epoch': 3} {'type': 'loss', 'content': 0.08797457069158554, 'timestamp': '2025-10-01 04:47:01.848500', 'step': 21400, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:01.902173', 'step': 21400, 'epoch': 3} {'type': 'loss', 'content': 0.09569685906171799, 'timestamp': '2025-10-01 04:47:01.904490', 'step': 21401, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:01.963973', 'step': 21401, 'epoch': 3} {'type': 'loss', 'content': 0.054320044815540314, 'timestamp': '2025-10-01 04:47:01.966199', 'step': 21402, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:02.021238', 'step': 21402, 'epoch': 3} {'type': 'loss', 'content': 0.08648157119750977, 'timestamp': '2025-10-01 04:47:02.023698', 'step': 21403, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:02.077681', 'step': 21403, 'epoch': 3} {'type': 'loss', 'content': 0.08757518231868744, 'timestamp': '2025-10-01 04:47:02.083687', 'step': 21404, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:02.138123', 'step': 21404, 'epoch': 3} {'type': 'loss', 'content': 0.09986400604248047, 'timestamp': '2025-10-01 04:47:02.140580', 'step': 21405, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:47:02.195119', 'step': 21405, 'epoch': 3} {'type': 'loss', 'content': 0.07951890677213669, 'timestamp': '2025-10-01 04:47:02.197313', 'step': 21406, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:02.251151', 'step': 21406, 'epoch': 3} {'type': 'loss', 'content': 0.0794718936085701, 'timestamp': '2025-10-01 04:47:02.253648', 'step': 21407, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:47:02.307741', 'step': 21407, 'epoch': 3} {'type': 'loss', 'content': 0.0659528449177742, 'timestamp': '2025-10-01 04:47:02.313480', 'step': 21408, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:02.366158', 'step': 21408, 'epoch': 3} {'type': 'loss', 'content': 0.058732349425554276, 'timestamp': '2025-10-01 04:47:02.368311', 'step': 21409, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:02.421705', 'step': 21409, 'epoch': 3} {'type': 'loss', 'content': 0.07939743995666504, 'timestamp': '2025-10-01 04:47:02.431449', 'step': 21410, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:02.485801', 'step': 21410, 'epoch': 3} {'type': 'loss', 'content': 0.07069332152605057, 'timestamp': '2025-10-01 04:47:02.488120', 'step': 21411, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:47:02.541714', 'step': 21411, 'epoch': 3} {'type': 'loss', 'content': 0.08619875460863113, 'timestamp': '2025-10-01 04:47:02.547545', 'step': 21412, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:02.600432', 'step': 21412, 'epoch': 3} {'type': 'loss', 'content': 0.142762690782547, 'timestamp': '2025-10-01 04:47:02.602689', 'step': 21413, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:02.656010', 'step': 21413, 'epoch': 3} {'type': 'loss', 'content': 0.06095188111066818, 'timestamp': '2025-10-01 04:47:02.658162', 'step': 21414, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:02.711379', 'step': 21414, 'epoch': 3} {'type': 'loss', 'content': 0.06952347606420517, 'timestamp': '2025-10-01 04:47:02.713659', 'step': 21415, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:02.767367', 'step': 21415, 'epoch': 3} {'type': 'loss', 'content': 0.07262694835662842, 'timestamp': '2025-10-01 04:47:02.773261', 'step': 21416, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:02.826301', 'step': 21416, 'epoch': 3} {'type': 'loss', 'content': 0.04419897496700287, 'timestamp': '2025-10-01 04:47:02.828482', 'step': 21417, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:02.883790', 'step': 21417, 'epoch': 3} {'type': 'loss', 'content': 0.07363222539424896, 'timestamp': '2025-10-01 04:47:02.887701', 'step': 21418, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:02.942127', 'step': 21418, 'epoch': 3} {'type': 'loss', 'content': 0.120966337621212, 'timestamp': '2025-10-01 04:47:02.944067', 'step': 21419, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:47:03.004135', 'step': 21419, 'epoch': 3} {'type': 'loss', 'content': 0.1441735476255417, 'timestamp': '2025-10-01 04:47:03.010260', 'step': 21420, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:03.063328', 'step': 21420, 'epoch': 3} {'type': 'loss', 'content': 0.12067890912294388, 'timestamp': '2025-10-01 04:47:03.065435', 'step': 21421, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:47:03.118752', 'step': 21421, 'epoch': 3} {'type': 'loss', 'content': 0.10024403035640717, 'timestamp': '2025-10-01 04:47:03.121040', 'step': 21422, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:03.174260', 'step': 21422, 'epoch': 3} {'type': 'loss', 'content': 0.10050362348556519, 'timestamp': '2025-10-01 04:47:03.176449', 'step': 21423, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:03.229927', 'step': 21423, 'epoch': 3} {'type': 'loss', 'content': 0.08950237929821014, 'timestamp': '2025-10-01 04:47:03.235769', 'step': 21424, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:03.288556', 'step': 21424, 'epoch': 3} {'type': 'loss', 'content': 0.05490169674158096, 'timestamp': '2025-10-01 04:47:03.291009', 'step': 21425, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:03.353482', 'step': 21425, 'epoch': 3} {'type': 'loss', 'content': 0.056929364800453186, 'timestamp': '2025-10-01 04:47:03.355806', 'step': 21426, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:03.409331', 'step': 21426, 'epoch': 3} {'type': 'loss', 'content': 0.08339911699295044, 'timestamp': '2025-10-01 04:47:03.411534', 'step': 21427, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:03.471877', 'step': 21427, 'epoch': 3} {'type': 'loss', 'content': 0.10032787173986435, 'timestamp': '2025-10-01 04:47:03.477555', 'step': 21428, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:03.530179', 'step': 21428, 'epoch': 3} {'type': 'loss', 'content': 0.12155143171548843, 'timestamp': '2025-10-01 04:47:03.532330', 'step': 21429, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:03.585522', 'step': 21429, 'epoch': 3} {'type': 'loss', 'content': 0.05431121215224266, 'timestamp': '2025-10-01 04:47:03.587773', 'step': 21430, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:03.642336', 'step': 21430, 'epoch': 3} {'type': 'loss', 'content': 0.08188942074775696, 'timestamp': '2025-10-01 04:47:03.644573', 'step': 21431, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:03.697791', 'step': 21431, 'epoch': 3} {'type': 'loss', 'content': 0.10901989042758942, 'timestamp': '2025-10-01 04:47:03.703446', 'step': 21432, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:03.755843', 'step': 21432, 'epoch': 3} {'type': 'loss', 'content': 0.04012051224708557, 'timestamp': '2025-10-01 04:47:03.758201', 'step': 21433, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:03.812181', 'step': 21433, 'epoch': 3} {'type': 'loss', 'content': 0.09115457534790039, 'timestamp': '2025-10-01 04:47:03.814457', 'step': 21434, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:03.867561', 'step': 21434, 'epoch': 3} {'type': 'loss', 'content': 0.07552533596754074, 'timestamp': '2025-10-01 04:47:03.869698', 'step': 21435, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:03.933631', 'step': 21435, 'epoch': 3} {'type': 'loss', 'content': 0.12980405986309052, 'timestamp': '2025-10-01 04:47:03.939421', 'step': 21436, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-01 04:47:16.895767', 'step': 21436, 'epoch': 3} {'type': 'pplx', 'content': 8915.62217113265, 'timestamp': '2025-10-01 04:47:16.898734', 'step': 21436, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:16.952187', 'step': 21436, 'epoch': 3} {'type': 'loss', 'content': 0.047418124973773956, 'timestamp': '2025-10-01 04:47:16.954318', 'step': 21437, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:17.008342', 'step': 21437, 'epoch': 3} {'type': 'loss', 'content': 0.11067318171262741, 'timestamp': '2025-10-01 04:47:17.010468', 'step': 21438, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:17.068239', 'step': 21438, 'epoch': 3} {'type': 'loss', 'content': 0.14377260208129883, 'timestamp': '2025-10-01 04:47:17.070886', 'step': 21439, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:17.124595', 'step': 21439, 'epoch': 3} {'type': 'loss', 'content': 0.09017954021692276, 'timestamp': '2025-10-01 04:47:17.130908', 'step': 21440, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:17.185478', 'step': 21440, 'epoch': 3} {'type': 'loss', 'content': 0.0850776955485344, 'timestamp': '2025-10-01 04:47:17.192302', 'step': 21441, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:17.248221', 'step': 21441, 'epoch': 3} {'type': 'loss', 'content': 0.09440833330154419, 'timestamp': '2025-10-01 04:47:17.250667', 'step': 21442, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:17.306412', 'step': 21442, 'epoch': 3} {'type': 'loss', 'content': 0.08432163298130035, 'timestamp': '2025-10-01 04:47:17.308744', 'step': 21443, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:17.363127', 'step': 21443, 'epoch': 3} {'type': 'loss', 'content': 0.07673761993646622, 'timestamp': '2025-10-01 04:47:17.369264', 'step': 21444, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:17.422897', 'step': 21444, 'epoch': 3} {'type': 'loss', 'content': 0.06789933890104294, 'timestamp': '2025-10-01 04:47:17.425107', 'step': 21445, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:17.482995', 'step': 21445, 'epoch': 3} {'type': 'loss', 'content': 0.04901876673102379, 'timestamp': '2025-10-01 04:47:17.485926', 'step': 21446, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:17.543925', 'step': 21446, 'epoch': 3} {'type': 'loss', 'content': 0.15771949291229248, 'timestamp': '2025-10-01 04:47:17.553147', 'step': 21447, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:17.610296', 'step': 21447, 'epoch': 3} {'type': 'loss', 'content': 0.06672497093677521, 'timestamp': '2025-10-01 04:47:17.616628', 'step': 21448, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:17.675503', 'step': 21448, 'epoch': 3} {'type': 'loss', 'content': 0.06304017454385757, 'timestamp': '2025-10-01 04:47:17.678031', 'step': 21449, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:17.734029', 'step': 21449, 'epoch': 3} {'type': 'loss', 'content': 0.1617252677679062, 'timestamp': '2025-10-01 04:47:17.736100', 'step': 21450, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:17.790230', 'step': 21450, 'epoch': 3} {'type': 'loss', 'content': 0.1024068295955658, 'timestamp': '2025-10-01 04:47:17.792757', 'step': 21451, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:17.847999', 'step': 21451, 'epoch': 3} {'type': 'loss', 'content': 0.06653908640146255, 'timestamp': '2025-10-01 04:47:17.855426', 'step': 21452, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:47:17.913166', 'step': 21452, 'epoch': 3} {'type': 'loss', 'content': 0.08403674513101578, 'timestamp': '2025-10-01 04:47:17.915406', 'step': 21453, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:17.981005', 'step': 21453, 'epoch': 3} {'type': 'loss', 'content': 0.14326804876327515, 'timestamp': '2025-10-01 04:47:17.983239', 'step': 21454, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:18.037630', 'step': 21454, 'epoch': 3} {'type': 'loss', 'content': 0.02414194494485855, 'timestamp': '2025-10-01 04:47:18.040046', 'step': 21455, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:18.096036', 'step': 21455, 'epoch': 3} {'type': 'loss', 'content': 0.0545828714966774, 'timestamp': '2025-10-01 04:47:18.102548', 'step': 21456, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:18.156088', 'step': 21456, 'epoch': 3} {'type': 'loss', 'content': 0.019613085314631462, 'timestamp': '2025-10-01 04:47:18.166391', 'step': 21457, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:18.219965', 'step': 21457, 'epoch': 3} {'type': 'loss', 'content': 0.08009025454521179, 'timestamp': '2025-10-01 04:47:18.222523', 'step': 21458, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:18.276066', 'step': 21458, 'epoch': 3} {'type': 'loss', 'content': 0.11520998924970627, 'timestamp': '2025-10-01 04:47:18.279316', 'step': 21459, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:18.334859', 'step': 21459, 'epoch': 3} {'type': 'loss', 'content': 0.07692845165729523, 'timestamp': '2025-10-01 04:47:18.341194', 'step': 21460, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:18.394913', 'step': 21460, 'epoch': 3} {'type': 'loss', 'content': 0.1050848588347435, 'timestamp': '2025-10-01 04:47:18.397539', 'step': 21461, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:18.451339', 'step': 21461, 'epoch': 3} {'type': 'loss', 'content': 0.06807562708854675, 'timestamp': '2025-10-01 04:47:18.461850', 'step': 21462, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:47:18.516623', 'step': 21462, 'epoch': 3} {'type': 'loss', 'content': 0.06812203675508499, 'timestamp': '2025-10-01 04:47:18.524332', 'step': 21463, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:18.580045', 'step': 21463, 'epoch': 3} {'type': 'loss', 'content': 0.08116595447063446, 'timestamp': '2025-10-01 04:47:18.586382', 'step': 21464, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:18.640082', 'step': 21464, 'epoch': 3} {'type': 'loss', 'content': 0.03560042008757591, 'timestamp': '2025-10-01 04:47:18.642354', 'step': 21465, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:47:18.696329', 'step': 21465, 'epoch': 3} {'type': 'loss', 'content': 0.03681133687496185, 'timestamp': '2025-10-01 04:47:18.698528', 'step': 21466, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:18.751696', 'step': 21466, 'epoch': 3} {'type': 'loss', 'content': 0.025704775005578995, 'timestamp': '2025-10-01 04:47:18.754019', 'step': 21467, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:18.807167', 'step': 21467, 'epoch': 3} {'type': 'loss', 'content': 0.06599408388137817, 'timestamp': '2025-10-01 04:47:18.819150', 'step': 21468, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:18.872117', 'step': 21468, 'epoch': 3} {'type': 'loss', 'content': 0.10140740871429443, 'timestamp': '2025-10-01 04:47:18.874721', 'step': 21469, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:18.929443', 'step': 21469, 'epoch': 3} {'type': 'loss', 'content': 0.06823284178972244, 'timestamp': '2025-10-01 04:47:18.931671', 'step': 21470, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:18.984855', 'step': 21470, 'epoch': 3} {'type': 'loss', 'content': 0.13699229061603546, 'timestamp': '2025-10-01 04:47:18.987011', 'step': 21471, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:19.040010', 'step': 21471, 'epoch': 3} {'type': 'loss', 'content': 0.08009252697229385, 'timestamp': '2025-10-01 04:47:19.046611', 'step': 21472, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:19.099189', 'step': 21472, 'epoch': 3} {'type': 'loss', 'content': 0.11415869742631912, 'timestamp': '2025-10-01 04:47:19.101310', 'step': 21473, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:19.154135', 'step': 21473, 'epoch': 3} {'type': 'loss', 'content': 0.021541744470596313, 'timestamp': '2025-10-01 04:47:19.157324', 'step': 21474, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:19.210844', 'step': 21474, 'epoch': 3} {'type': 'loss', 'content': 0.08361007273197174, 'timestamp': '2025-10-01 04:47:19.212919', 'step': 21475, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:47:19.270862', 'step': 21475, 'epoch': 3} {'type': 'loss', 'content': 0.15385597944259644, 'timestamp': '2025-10-01 04:47:19.276600', 'step': 21476, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:47:19.329623', 'step': 21476, 'epoch': 3} {'type': 'loss', 'content': 0.08447488397359848, 'timestamp': '2025-10-01 04:47:19.332036', 'step': 21477, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:47:19.385699', 'step': 21477, 'epoch': 3} {'type': 'loss', 'content': 0.06624355912208557, 'timestamp': '2025-10-01 04:47:19.387903', 'step': 21478, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:19.441841', 'step': 21478, 'epoch': 3} {'type': 'loss', 'content': 0.08726587891578674, 'timestamp': '2025-10-01 04:47:19.443925', 'step': 21479, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:19.497732', 'step': 21479, 'epoch': 3} {'type': 'loss', 'content': 0.07524275779724121, 'timestamp': '2025-10-01 04:47:19.503403', 'step': 21480, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:19.563294', 'step': 21480, 'epoch': 3} {'type': 'loss', 'content': 0.08607981353998184, 'timestamp': '2025-10-01 04:47:19.565424', 'step': 21481, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:19.624412', 'step': 21481, 'epoch': 3} {'type': 'loss', 'content': 0.08739437162876129, 'timestamp': '2025-10-01 04:47:19.626747', 'step': 21482, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:19.679943', 'step': 21482, 'epoch': 3} {'type': 'loss', 'content': 0.12899868190288544, 'timestamp': '2025-10-01 04:47:19.682040', 'step': 21483, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:19.735048', 'step': 21483, 'epoch': 3} {'type': 'loss', 'content': 0.11099598556756973, 'timestamp': '2025-10-01 04:47:19.740770', 'step': 21484, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:47:19.793332', 'step': 21484, 'epoch': 3} {'type': 'loss', 'content': 0.06871336698532104, 'timestamp': '2025-10-01 04:47:19.795414', 'step': 21485, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:19.850194', 'step': 21485, 'epoch': 3} {'type': 'loss', 'content': 0.09848972409963608, 'timestamp': '2025-10-01 04:47:19.852258', 'step': 21486, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:19.905821', 'step': 21486, 'epoch': 3} {'type': 'loss', 'content': 0.043164920061826706, 'timestamp': '2025-10-01 04:47:19.908122', 'step': 21487, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:19.961552', 'step': 21487, 'epoch': 3} {'type': 'loss', 'content': 0.0839586928486824, 'timestamp': '2025-10-01 04:47:19.967602', 'step': 21488, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:20.021235', 'step': 21488, 'epoch': 3} {'type': 'loss', 'content': 0.11130815744400024, 'timestamp': '2025-10-01 04:47:20.023446', 'step': 21489, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:47:20.078689', 'step': 21489, 'epoch': 3} {'type': 'loss', 'content': 0.11328289657831192, 'timestamp': '2025-10-01 04:47:20.080899', 'step': 21490, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:47:20.134622', 'step': 21490, 'epoch': 3} {'type': 'loss', 'content': 0.08477067202329636, 'timestamp': '2025-10-01 04:47:20.136892', 'step': 21491, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:47:20.193756', 'step': 21491, 'epoch': 3} {'type': 'loss', 'content': 0.08120309561491013, 'timestamp': '2025-10-01 04:47:20.200376', 'step': 21492, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:20.254282', 'step': 21492, 'epoch': 3} {'type': 'loss', 'content': 0.0642179623246193, 'timestamp': '2025-10-01 04:47:20.256390', 'step': 21493, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:20.313995', 'step': 21493, 'epoch': 3} {'type': 'loss', 'content': 0.12513308227062225, 'timestamp': '2025-10-01 04:47:20.316124', 'step': 21494, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:20.369998', 'step': 21494, 'epoch': 3} {'type': 'loss', 'content': 0.1258241981267929, 'timestamp': '2025-10-01 04:47:20.372193', 'step': 21495, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:47:20.426313', 'step': 21495, 'epoch': 3} {'type': 'loss', 'content': 0.07617548853158951, 'timestamp': '2025-10-01 04:47:20.432595', 'step': 21496, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:47:20.485977', 'step': 21496, 'epoch': 3} {'type': 'loss', 'content': 0.0798662081360817, 'timestamp': '2025-10-01 04:47:20.487945', 'step': 21497, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:20.541233', 'step': 21497, 'epoch': 3} {'type': 'loss', 'content': 0.08189056813716888, 'timestamp': '2025-10-01 04:47:20.543445', 'step': 21498, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:20.597435', 'step': 21498, 'epoch': 3} {'type': 'loss', 'content': 0.06794384121894836, 'timestamp': '2025-10-01 04:47:20.599570', 'step': 21499, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:20.652917', 'step': 21499, 'epoch': 3} {'type': 'loss', 'content': 0.14480841159820557, 'timestamp': '2025-10-01 04:47:20.658930', 'step': 21500, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 21500', 'timestamp': '2025-10-01 04:47:21.054632', 'step': 21500, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:21.109138', 'step': 21500, 'epoch': 3} {'type': 'loss', 'content': 0.11618250608444214, 'timestamp': '2025-10-01 04:47:21.111219', 'step': 21501, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:21.165357', 'step': 21501, 'epoch': 3} {'type': 'loss', 'content': 0.05747209116816521, 'timestamp': '2025-10-01 04:47:21.167478', 'step': 21502, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:21.235894', 'step': 21502, 'epoch': 3} {'type': 'loss', 'content': 0.04959087073802948, 'timestamp': '2025-10-01 04:47:21.241563', 'step': 21503, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:21.296584', 'step': 21503, 'epoch': 3} {'type': 'loss', 'content': 0.1224040538072586, 'timestamp': '2025-10-01 04:47:21.303190', 'step': 21504, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:47:21.362425', 'step': 21504, 'epoch': 3} {'type': 'loss', 'content': 0.1603415608406067, 'timestamp': '2025-10-01 04:47:21.364869', 'step': 21505, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:21.418533', 'step': 21505, 'epoch': 3} {'type': 'loss', 'content': 0.038801487535238266, 'timestamp': '2025-10-01 04:47:21.420811', 'step': 21506, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:47:21.478330', 'step': 21506, 'epoch': 3} {'type': 'loss', 'content': 0.06425371021032333, 'timestamp': '2025-10-01 04:47:21.482195', 'step': 21507, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:21.536709', 'step': 21507, 'epoch': 3} {'type': 'loss', 'content': 0.09517587721347809, 'timestamp': '2025-10-01 04:47:21.542827', 'step': 21508, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:21.595607', 'step': 21508, 'epoch': 3} {'type': 'loss', 'content': 0.09334929287433624, 'timestamp': '2025-10-01 04:47:21.599024', 'step': 21509, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:47:21.663190', 'step': 21509, 'epoch': 3} {'type': 'loss', 'content': 0.08184060454368591, 'timestamp': '2025-10-01 04:47:21.665443', 'step': 21510, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:47:21.728309', 'step': 21510, 'epoch': 3} {'type': 'loss', 'content': 0.08898674696683884, 'timestamp': '2025-10-01 04:47:21.730477', 'step': 21511, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:47:21.784020', 'step': 21511, 'epoch': 3} {'type': 'loss', 'content': 0.1383509784936905, 'timestamp': '2025-10-01 04:47:21.789899', 'step': 21512, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:21.842714', 'step': 21512, 'epoch': 3} {'type': 'loss', 'content': 0.07983476668596268, 'timestamp': '2025-10-01 04:47:21.844780', 'step': 21513, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:21.898921', 'step': 21513, 'epoch': 3} {'type': 'loss', 'content': 0.07027336955070496, 'timestamp': '2025-10-01 04:47:21.912932', 'step': 21514, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:21.977165', 'step': 21514, 'epoch': 3} {'type': 'loss', 'content': 0.057076334953308105, 'timestamp': '2025-10-01 04:47:21.979097', 'step': 21515, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:47:22.037593', 'step': 21515, 'epoch': 3} {'type': 'loss', 'content': 0.06648679077625275, 'timestamp': '2025-10-01 04:47:22.043779', 'step': 21516, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:22.100469', 'step': 21516, 'epoch': 3} {'type': 'loss', 'content': 0.08710359036922455, 'timestamp': '2025-10-01 04:47:22.104298', 'step': 21517, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:22.159439', 'step': 21517, 'epoch': 3} {'type': 'loss', 'content': 0.027272038161754608, 'timestamp': '2025-10-01 04:47:22.161604', 'step': 21518, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:47:22.216622', 'step': 21518, 'epoch': 3} {'type': 'loss', 'content': 0.023707544431090355, 'timestamp': '2025-10-01 04:47:22.218747', 'step': 21519, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:22.273607', 'step': 21519, 'epoch': 3} {'type': 'loss', 'content': 0.058024171739816666, 'timestamp': '2025-10-01 04:47:22.280016', 'step': 21520, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:22.338791', 'step': 21520, 'epoch': 3} {'type': 'loss', 'content': 0.037004534155130386, 'timestamp': '2025-10-01 04:47:22.341537', 'step': 21521, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:22.396580', 'step': 21521, 'epoch': 3} {'type': 'loss', 'content': 0.07240106165409088, 'timestamp': '2025-10-01 04:47:22.398732', 'step': 21522, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:47:22.452301', 'step': 21522, 'epoch': 3} {'type': 'loss', 'content': 0.12762416899204254, 'timestamp': '2025-10-01 04:47:22.456570', 'step': 21523, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:22.511699', 'step': 21523, 'epoch': 3} {'type': 'loss', 'content': 0.14317914843559265, 'timestamp': '2025-10-01 04:47:22.517897', 'step': 21524, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:22.571754', 'step': 21524, 'epoch': 3} {'type': 'loss', 'content': 0.027602359652519226, 'timestamp': '2025-10-01 04:47:22.574318', 'step': 21525, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:22.632843', 'step': 21525, 'epoch': 3} {'type': 'loss', 'content': 0.0566042922437191, 'timestamp': '2025-10-01 04:47:22.635092', 'step': 21526, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:47:22.691126', 'step': 21526, 'epoch': 3} {'type': 'loss', 'content': 0.05277225002646446, 'timestamp': '2025-10-01 04:47:22.693722', 'step': 21527, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:22.748883', 'step': 21527, 'epoch': 3} {'type': 'loss', 'content': 0.06704200059175491, 'timestamp': '2025-10-01 04:47:22.754863', 'step': 21528, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:22.807467', 'step': 21528, 'epoch': 3} {'type': 'loss', 'content': 0.03005257435142994, 'timestamp': '2025-10-01 04:47:22.809592', 'step': 21529, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:22.863556', 'step': 21529, 'epoch': 3} {'type': 'loss', 'content': 0.11621562391519547, 'timestamp': '2025-10-01 04:47:22.865641', 'step': 21530, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:22.920156', 'step': 21530, 'epoch': 3} {'type': 'loss', 'content': 0.04701666533946991, 'timestamp': '2025-10-01 04:47:22.922371', 'step': 21531, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:22.977731', 'step': 21531, 'epoch': 3} {'type': 'loss', 'content': 0.07407086342573166, 'timestamp': '2025-10-01 04:47:22.984469', 'step': 21532, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:23.038523', 'step': 21532, 'epoch': 3} {'type': 'loss', 'content': 0.08632906526327133, 'timestamp': '2025-10-01 04:47:23.043910', 'step': 21533, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:23.102427', 'step': 21533, 'epoch': 3} {'type': 'loss', 'content': 0.08912958204746246, 'timestamp': '2025-10-01 04:47:23.105002', 'step': 21534, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:23.162320', 'step': 21534, 'epoch': 3} {'type': 'loss', 'content': 0.05368085205554962, 'timestamp': '2025-10-01 04:47:23.169681', 'step': 21535, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:23.227826', 'step': 21535, 'epoch': 3} {'type': 'loss', 'content': 0.11070697754621506, 'timestamp': '2025-10-01 04:47:23.234747', 'step': 21536, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:23.290520', 'step': 21536, 'epoch': 3} {'type': 'loss', 'content': 0.07917217910289764, 'timestamp': '2025-10-01 04:47:23.292793', 'step': 21537, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:23.347544', 'step': 21537, 'epoch': 3} {'type': 'loss', 'content': 0.013642281293869019, 'timestamp': '2025-10-01 04:47:23.349707', 'step': 21538, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:23.404977', 'step': 21538, 'epoch': 3} {'type': 'loss', 'content': 0.05168043076992035, 'timestamp': '2025-10-01 04:47:23.407129', 'step': 21539, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:23.461570', 'step': 21539, 'epoch': 3} {'type': 'loss', 'content': 0.03645947203040123, 'timestamp': '2025-10-01 04:47:23.468751', 'step': 21540, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:23.525315', 'step': 21540, 'epoch': 3} {'type': 'loss', 'content': 0.07237475365400314, 'timestamp': '2025-10-01 04:47:23.527542', 'step': 21541, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:23.583594', 'step': 21541, 'epoch': 3} {'type': 'loss', 'content': 0.11304742842912674, 'timestamp': '2025-10-01 04:47:23.585808', 'step': 21542, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:23.640031', 'step': 21542, 'epoch': 3} {'type': 'loss', 'content': 0.10177181661128998, 'timestamp': '2025-10-01 04:47:23.642197', 'step': 21543, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:23.698147', 'step': 21543, 'epoch': 3} {'type': 'loss', 'content': 0.06628532707691193, 'timestamp': '2025-10-01 04:47:23.706257', 'step': 21544, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:23.768642', 'step': 21544, 'epoch': 3} {'type': 'loss', 'content': 0.13359799981117249, 'timestamp': '2025-10-01 04:47:23.770937', 'step': 21545, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:23.831797', 'step': 21545, 'epoch': 3} {'type': 'loss', 'content': 0.05796600505709648, 'timestamp': '2025-10-01 04:47:23.834183', 'step': 21546, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:23.890174', 'step': 21546, 'epoch': 3} {'type': 'loss', 'content': 0.06544030457735062, 'timestamp': '2025-10-01 04:47:23.892419', 'step': 21547, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:23.946198', 'step': 21547, 'epoch': 3} {'type': 'loss', 'content': 0.06452745199203491, 'timestamp': '2025-10-01 04:47:23.955084', 'step': 21548, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:24.008160', 'step': 21548, 'epoch': 3} {'type': 'loss', 'content': 0.17648945748806, 'timestamp': '2025-10-01 04:47:24.010600', 'step': 21549, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:24.064391', 'step': 21549, 'epoch': 3} {'type': 'loss', 'content': 0.07211674004793167, 'timestamp': '2025-10-01 04:47:24.066597', 'step': 21550, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:24.120281', 'step': 21550, 'epoch': 3} {'type': 'loss', 'content': 0.07742885500192642, 'timestamp': '2025-10-01 04:47:24.122941', 'step': 21551, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:24.176018', 'step': 21551, 'epoch': 3} {'type': 'loss', 'content': 0.05346457660198212, 'timestamp': '2025-10-01 04:47:24.181973', 'step': 21552, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:24.234498', 'step': 21552, 'epoch': 3} {'type': 'loss', 'content': 0.06880907714366913, 'timestamp': '2025-10-01 04:47:24.236667', 'step': 21553, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:24.292308', 'step': 21553, 'epoch': 3} {'type': 'loss', 'content': 0.05248934030532837, 'timestamp': '2025-10-01 04:47:24.294443', 'step': 21554, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:24.347779', 'step': 21554, 'epoch': 3} {'type': 'loss', 'content': 0.10567907989025116, 'timestamp': '2025-10-01 04:47:24.349939', 'step': 21555, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:24.403433', 'step': 21555, 'epoch': 3} {'type': 'loss', 'content': 0.0823432207107544, 'timestamp': '2025-10-01 04:47:24.409235', 'step': 21556, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:24.462333', 'step': 21556, 'epoch': 3} {'type': 'loss', 'content': 0.027378827333450317, 'timestamp': '2025-10-01 04:47:24.464416', 'step': 21557, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:24.517002', 'step': 21557, 'epoch': 3} {'type': 'loss', 'content': 0.06383608281612396, 'timestamp': '2025-10-01 04:47:24.519289', 'step': 21558, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:24.572217', 'step': 21558, 'epoch': 3} {'type': 'loss', 'content': 0.09557946771383286, 'timestamp': '2025-10-01 04:47:24.574343', 'step': 21559, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:24.630557', 'step': 21559, 'epoch': 3} {'type': 'loss', 'content': 0.15950733423233032, 'timestamp': '2025-10-01 04:47:24.636251', 'step': 21560, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:24.688598', 'step': 21560, 'epoch': 3} {'type': 'loss', 'content': 0.08190422505140305, 'timestamp': '2025-10-01 04:47:24.691344', 'step': 21561, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:24.745582', 'step': 21561, 'epoch': 3} {'type': 'loss', 'content': 0.02477589249610901, 'timestamp': '2025-10-01 04:47:24.747672', 'step': 21562, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:47:24.801474', 'step': 21562, 'epoch': 3} {'type': 'loss', 'content': 0.11250407248735428, 'timestamp': '2025-10-01 04:47:24.803725', 'step': 21563, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:24.857272', 'step': 21563, 'epoch': 3} {'type': 'loss', 'content': 0.09653162956237793, 'timestamp': '2025-10-01 04:47:24.863133', 'step': 21564, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:24.923122', 'step': 21564, 'epoch': 3} {'type': 'loss', 'content': 0.04549982771277428, 'timestamp': '2025-10-01 04:47:24.925235', 'step': 21565, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:24.979215', 'step': 21565, 'epoch': 3} {'type': 'loss', 'content': 0.02758743241429329, 'timestamp': '2025-10-01 04:47:24.981316', 'step': 21566, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:47:25.035424', 'step': 21566, 'epoch': 3} {'type': 'loss', 'content': 0.017595170065760612, 'timestamp': '2025-10-01 04:47:25.037582', 'step': 21567, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:25.092839', 'step': 21567, 'epoch': 3} {'type': 'loss', 'content': 0.04777391999959946, 'timestamp': '2025-10-01 04:47:25.099256', 'step': 21568, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:47:25.156047', 'step': 21568, 'epoch': 3} {'type': 'loss', 'content': 0.1328551173210144, 'timestamp': '2025-10-01 04:47:25.158152', 'step': 21569, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:25.213622', 'step': 21569, 'epoch': 3} {'type': 'loss', 'content': 0.04526353254914284, 'timestamp': '2025-10-01 04:47:25.215964', 'step': 21570, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:25.270884', 'step': 21570, 'epoch': 3} {'type': 'loss', 'content': 0.037072256207466125, 'timestamp': '2025-10-01 04:47:25.273475', 'step': 21571, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:47:25.332163', 'step': 21571, 'epoch': 3} {'type': 'loss', 'content': 0.08040221035480499, 'timestamp': '2025-10-01 04:47:25.338816', 'step': 21572, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:25.393524', 'step': 21572, 'epoch': 3} {'type': 'loss', 'content': 0.09973112493753433, 'timestamp': '2025-10-01 04:47:25.396071', 'step': 21573, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:25.450933', 'step': 21573, 'epoch': 3} {'type': 'loss', 'content': 0.05904793739318848, 'timestamp': '2025-10-01 04:47:25.453441', 'step': 21574, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:25.508673', 'step': 21574, 'epoch': 3} {'type': 'loss', 'content': 0.11897888034582138, 'timestamp': '2025-10-01 04:47:25.510565', 'step': 21575, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:25.564172', 'step': 21575, 'epoch': 3} {'type': 'loss', 'content': 0.030511178076267242, 'timestamp': '2025-10-01 04:47:25.570358', 'step': 21576, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:25.624636', 'step': 21576, 'epoch': 3} {'type': 'loss', 'content': 0.08403585851192474, 'timestamp': '2025-10-01 04:47:25.627439', 'step': 21577, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:47:25.681270', 'step': 21577, 'epoch': 3} {'type': 'loss', 'content': 0.14171627163887024, 'timestamp': '2025-10-01 04:47:25.683665', 'step': 21578, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:25.741740', 'step': 21578, 'epoch': 3} {'type': 'loss', 'content': 0.050419505685567856, 'timestamp': '2025-10-01 04:47:25.743950', 'step': 21579, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:25.797984', 'step': 21579, 'epoch': 3} {'type': 'loss', 'content': 0.07234176993370056, 'timestamp': '2025-10-01 04:47:25.804911', 'step': 21580, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:25.858553', 'step': 21580, 'epoch': 3} {'type': 'loss', 'content': 0.022066617384552956, 'timestamp': '2025-10-01 04:47:25.860951', 'step': 21581, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:47:25.915187', 'step': 21581, 'epoch': 3} {'type': 'loss', 'content': 0.051470812410116196, 'timestamp': '2025-10-01 04:47:25.917312', 'step': 21582, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:25.971751', 'step': 21582, 'epoch': 3} {'type': 'loss', 'content': 0.03505256772041321, 'timestamp': '2025-10-01 04:47:25.975933', 'step': 21583, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:26.032385', 'step': 21583, 'epoch': 3} {'type': 'loss', 'content': 0.09244545549154282, 'timestamp': '2025-10-01 04:47:26.038717', 'step': 21584, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:26.093949', 'step': 21584, 'epoch': 3} {'type': 'loss', 'content': 0.06218138337135315, 'timestamp': '2025-10-01 04:47:26.096567', 'step': 21585, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:26.150024', 'step': 21585, 'epoch': 3} {'type': 'loss', 'content': 0.05226263403892517, 'timestamp': '2025-10-01 04:47:26.153583', 'step': 21586, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:26.207305', 'step': 21586, 'epoch': 3} {'type': 'loss', 'content': 0.24656963348388672, 'timestamp': '2025-10-01 04:47:26.215647', 'step': 21587, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:26.270596', 'step': 21587, 'epoch': 3} {'type': 'loss', 'content': 0.060092948377132416, 'timestamp': '2025-10-01 04:47:26.277417', 'step': 21588, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:26.330880', 'step': 21588, 'epoch': 3} {'type': 'loss', 'content': 0.05370540916919708, 'timestamp': '2025-10-01 04:47:26.333355', 'step': 21589, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:26.387782', 'step': 21589, 'epoch': 3} {'type': 'loss', 'content': 0.08880838006734848, 'timestamp': '2025-10-01 04:47:26.390346', 'step': 21590, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:26.457985', 'step': 21590, 'epoch': 3} {'type': 'loss', 'content': 0.0070279925130307674, 'timestamp': '2025-10-01 04:47:26.460608', 'step': 21591, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:26.516142', 'step': 21591, 'epoch': 3} {'type': 'loss', 'content': 0.026794353500008583, 'timestamp': '2025-10-01 04:47:26.522945', 'step': 21592, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:26.580403', 'step': 21592, 'epoch': 3} {'type': 'loss', 'content': 0.03821282088756561, 'timestamp': '2025-10-01 04:47:26.583383', 'step': 21593, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:26.643989', 'step': 21593, 'epoch': 3} {'type': 'loss', 'content': 0.1078566461801529, 'timestamp': '2025-10-01 04:47:26.646637', 'step': 21594, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:26.706628', 'step': 21594, 'epoch': 3} {'type': 'loss', 'content': 0.13583000004291534, 'timestamp': '2025-10-01 04:47:26.709102', 'step': 21595, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:26.769073', 'step': 21595, 'epoch': 3} {'type': 'loss', 'content': 0.06040532886981964, 'timestamp': '2025-10-01 04:47:26.777096', 'step': 21596, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:47:26.835955', 'step': 21596, 'epoch': 3} {'type': 'loss', 'content': 0.04392601177096367, 'timestamp': '2025-10-01 04:47:26.838377', 'step': 21597, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:26.899080', 'step': 21597, 'epoch': 3} {'type': 'loss', 'content': 0.050972647964954376, 'timestamp': '2025-10-01 04:47:26.901919', 'step': 21598, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:26.960152', 'step': 21598, 'epoch': 3} {'type': 'loss', 'content': 0.09475576877593994, 'timestamp': '2025-10-01 04:47:26.962896', 'step': 21599, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:47:27.023958', 'step': 21599, 'epoch': 3} {'type': 'loss', 'content': 0.06332633644342422, 'timestamp': '2025-10-01 04:47:27.031178', 'step': 21600, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:27.090247', 'step': 21600, 'epoch': 3} {'type': 'loss', 'content': 0.05326896905899048, 'timestamp': '2025-10-01 04:47:27.092642', 'step': 21601, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:27.152543', 'step': 21601, 'epoch': 3} {'type': 'loss', 'content': 0.05204545706510544, 'timestamp': '2025-10-01 04:47:27.155071', 'step': 21602, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:27.213333', 'step': 21602, 'epoch': 3} {'type': 'loss', 'content': 0.07080399245023727, 'timestamp': '2025-10-01 04:47:27.216626', 'step': 21603, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:27.276520', 'step': 21603, 'epoch': 3} {'type': 'loss', 'content': 0.10156464576721191, 'timestamp': '2025-10-01 04:47:27.283917', 'step': 21604, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:47:27.343197', 'step': 21604, 'epoch': 3} {'type': 'loss', 'content': 0.09135814756155014, 'timestamp': '2025-10-01 04:47:27.345990', 'step': 21605, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:27.406102', 'step': 21605, 'epoch': 3} {'type': 'loss', 'content': 0.06844460964202881, 'timestamp': '2025-10-01 04:47:27.408528', 'step': 21606, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:27.465349', 'step': 21606, 'epoch': 3} {'type': 'loss', 'content': 0.07301044464111328, 'timestamp': '2025-10-01 04:47:27.467946', 'step': 21607, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:27.535199', 'step': 21607, 'epoch': 3} {'type': 'loss', 'content': 0.04006976634263992, 'timestamp': '2025-10-01 04:47:27.542240', 'step': 21608, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:27.602266', 'step': 21608, 'epoch': 3} {'type': 'loss', 'content': 0.16821683943271637, 'timestamp': '2025-10-01 04:47:27.619645', 'step': 21609, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:47:27.681782', 'step': 21609, 'epoch': 3} {'type': 'loss', 'content': 0.1328832060098648, 'timestamp': '2025-10-01 04:47:27.685811', 'step': 21610, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:27.757591', 'step': 21610, 'epoch': 3} {'type': 'loss', 'content': 0.0947435051202774, 'timestamp': '2025-10-01 04:47:27.760174', 'step': 21611, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:27.825297', 'step': 21611, 'epoch': 3} {'type': 'loss', 'content': 0.03759165480732918, 'timestamp': '2025-10-01 04:47:27.832686', 'step': 21612, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:27.893461', 'step': 21612, 'epoch': 3} {'type': 'loss', 'content': 0.0784294456243515, 'timestamp': '2025-10-01 04:47:27.895719', 'step': 21613, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:27.956667', 'step': 21613, 'epoch': 3} {'type': 'loss', 'content': 0.06835833191871643, 'timestamp': '2025-10-01 04:47:27.959055', 'step': 21614, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:28.018601', 'step': 21614, 'epoch': 3} {'type': 'loss', 'content': 0.055894628167152405, 'timestamp': '2025-10-01 04:47:28.021031', 'step': 21615, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:47:28.081925', 'step': 21615, 'epoch': 3} {'type': 'loss', 'content': 0.08491815626621246, 'timestamp': '2025-10-01 04:47:28.089002', 'step': 21616, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:28.147296', 'step': 21616, 'epoch': 3} {'type': 'loss', 'content': 0.09282350540161133, 'timestamp': '2025-10-01 04:47:28.149716', 'step': 21617, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:28.209155', 'step': 21617, 'epoch': 3} {'type': 'loss', 'content': 0.09177260100841522, 'timestamp': '2025-10-01 04:47:28.213358', 'step': 21618, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:47:28.272117', 'step': 21618, 'epoch': 3} {'type': 'loss', 'content': 0.10925308614969254, 'timestamp': '2025-10-01 04:47:28.274527', 'step': 21619, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:28.336342', 'step': 21619, 'epoch': 3} {'type': 'loss', 'content': 0.05859934166073799, 'timestamp': '2025-10-01 04:47:28.345101', 'step': 21620, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:28.403136', 'step': 21620, 'epoch': 3} {'type': 'loss', 'content': 0.08667592704296112, 'timestamp': '2025-10-01 04:47:28.405950', 'step': 21621, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:28.470387', 'step': 21621, 'epoch': 3} {'type': 'loss', 'content': 0.10825010389089584, 'timestamp': '2025-10-01 04:47:28.472603', 'step': 21622, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:47:28.527870', 'step': 21622, 'epoch': 3} {'type': 'loss', 'content': 0.04401877522468567, 'timestamp': '2025-10-01 04:47:28.530133', 'step': 21623, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:28.583838', 'step': 21623, 'epoch': 3} {'type': 'loss', 'content': 0.058480001986026764, 'timestamp': '2025-10-01 04:47:28.590660', 'step': 21624, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:28.644283', 'step': 21624, 'epoch': 3} {'type': 'loss', 'content': 0.14919692277908325, 'timestamp': '2025-10-01 04:47:28.646774', 'step': 21625, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:47:28.700020', 'step': 21625, 'epoch': 3} {'type': 'loss', 'content': 0.04951285570859909, 'timestamp': '2025-10-01 04:47:28.702220', 'step': 21626, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:28.755444', 'step': 21626, 'epoch': 3} {'type': 'loss', 'content': 0.09414136409759521, 'timestamp': '2025-10-01 04:47:28.758378', 'step': 21627, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:28.811745', 'step': 21627, 'epoch': 3} {'type': 'loss', 'content': 0.08800774067640305, 'timestamp': '2025-10-01 04:47:28.821306', 'step': 21628, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:28.874750', 'step': 21628, 'epoch': 3} {'type': 'loss', 'content': 0.013645857572555542, 'timestamp': '2025-10-01 04:47:28.876961', 'step': 21629, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:28.929832', 'step': 21629, 'epoch': 3} {'type': 'loss', 'content': 0.10661381483078003, 'timestamp': '2025-10-01 04:47:28.932033', 'step': 21630, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:47:28.985546', 'step': 21630, 'epoch': 3} {'type': 'loss', 'content': 0.2020157426595688, 'timestamp': '2025-10-01 04:47:28.987443', 'step': 21631, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:29.041208', 'step': 21631, 'epoch': 3} {'type': 'loss', 'content': 0.10083126276731491, 'timestamp': '2025-10-01 04:47:29.046973', 'step': 21632, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:29.099273', 'step': 21632, 'epoch': 3} {'type': 'loss', 'content': 0.05420905724167824, 'timestamp': '2025-10-01 04:47:29.101562', 'step': 21633, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:29.154824', 'step': 21633, 'epoch': 3} {'type': 'loss', 'content': 0.09985611587762833, 'timestamp': '2025-10-01 04:47:29.158281', 'step': 21634, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:47:29.211790', 'step': 21634, 'epoch': 3} {'type': 'loss', 'content': 0.03943576663732529, 'timestamp': '2025-10-01 04:47:29.214136', 'step': 21635, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:29.268127', 'step': 21635, 'epoch': 3} {'type': 'loss', 'content': 0.09326690435409546, 'timestamp': '2025-10-01 04:47:29.274077', 'step': 21636, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-01 04:47:29.327210', 'step': 21636, 'epoch': 3} {'type': 'loss', 'content': 0.09560129046440125, 'timestamp': '2025-10-01 04:47:29.329476', 'step': 21637, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:29.382424', 'step': 21637, 'epoch': 3} {'type': 'loss', 'content': 0.07586897909641266, 'timestamp': '2025-10-01 04:47:29.384773', 'step': 21638, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:47:29.438295', 'step': 21638, 'epoch': 3} {'type': 'loss', 'content': 0.09329269826412201, 'timestamp': '2025-10-01 04:47:29.440500', 'step': 21639, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:29.494351', 'step': 21639, 'epoch': 3} {'type': 'loss', 'content': 0.04485613480210304, 'timestamp': '2025-10-01 04:47:29.500417', 'step': 21640, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:29.553592', 'step': 21640, 'epoch': 3} {'type': 'loss', 'content': 0.06211746484041214, 'timestamp': '2025-10-01 04:47:29.555777', 'step': 21641, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:29.609421', 'step': 21641, 'epoch': 3} {'type': 'loss', 'content': 0.09786563366651535, 'timestamp': '2025-10-01 04:47:29.611711', 'step': 21642, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:29.665063', 'step': 21642, 'epoch': 3} {'type': 'loss', 'content': 0.10366066545248032, 'timestamp': '2025-10-01 04:47:29.667265', 'step': 21643, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:29.720557', 'step': 21643, 'epoch': 3} {'type': 'loss', 'content': 0.05320035666227341, 'timestamp': '2025-10-01 04:47:29.726242', 'step': 21644, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:29.778829', 'step': 21644, 'epoch': 3} {'type': 'loss', 'content': 0.02388330176472664, 'timestamp': '2025-10-01 04:47:29.784339', 'step': 21645, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:29.838395', 'step': 21645, 'epoch': 3} {'type': 'loss', 'content': 0.03086799383163452, 'timestamp': '2025-10-01 04:47:29.840629', 'step': 21646, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:29.893940', 'step': 21646, 'epoch': 3} {'type': 'loss', 'content': 0.06667187064886093, 'timestamp': '2025-10-01 04:47:29.896268', 'step': 21647, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:29.949269', 'step': 21647, 'epoch': 3} {'type': 'loss', 'content': 0.051496993750333786, 'timestamp': '2025-10-01 04:47:29.955001', 'step': 21648, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:30.010829', 'step': 21648, 'epoch': 3} {'type': 'loss', 'content': 0.10442662239074707, 'timestamp': '2025-10-01 04:47:30.013142', 'step': 21649, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:30.067883', 'step': 21649, 'epoch': 3} {'type': 'loss', 'content': 0.1875700205564499, 'timestamp': '2025-10-01 04:47:30.070128', 'step': 21650, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:30.123553', 'step': 21650, 'epoch': 3} {'type': 'loss', 'content': 0.1364511400461197, 'timestamp': '2025-10-01 04:47:30.125696', 'step': 21651, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:30.179283', 'step': 21651, 'epoch': 3} {'type': 'loss', 'content': 0.09631625562906265, 'timestamp': '2025-10-01 04:47:30.185103', 'step': 21652, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:30.244195', 'step': 21652, 'epoch': 3} {'type': 'loss', 'content': 0.08169743418693542, 'timestamp': '2025-10-01 04:47:30.246450', 'step': 21653, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:30.299525', 'step': 21653, 'epoch': 3} {'type': 'loss', 'content': 0.030142951756715775, 'timestamp': '2025-10-01 04:47:30.302290', 'step': 21654, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:30.355574', 'step': 21654, 'epoch': 3} {'type': 'loss', 'content': 0.03659934177994728, 'timestamp': '2025-10-01 04:47:30.357869', 'step': 21655, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:30.411091', 'step': 21655, 'epoch': 3} {'type': 'loss', 'content': 0.10278975963592529, 'timestamp': '2025-10-01 04:47:30.416850', 'step': 21656, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:30.469795', 'step': 21656, 'epoch': 3} {'type': 'loss', 'content': 0.08027026057243347, 'timestamp': '2025-10-01 04:47:30.473793', 'step': 21657, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:30.526969', 'step': 21657, 'epoch': 3} {'type': 'loss', 'content': 0.13001199066638947, 'timestamp': '2025-10-01 04:47:30.529269', 'step': 21658, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:30.582735', 'step': 21658, 'epoch': 3} {'type': 'loss', 'content': 0.06196529418230057, 'timestamp': '2025-10-01 04:47:30.586106', 'step': 21659, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:30.638845', 'step': 21659, 'epoch': 3} {'type': 'loss', 'content': 0.08272572606801987, 'timestamp': '2025-10-01 04:47:30.644803', 'step': 21660, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:30.697554', 'step': 21660, 'epoch': 3} {'type': 'loss', 'content': 0.10114102810621262, 'timestamp': '2025-10-01 04:47:30.701921', 'step': 21661, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:47:30.757449', 'step': 21661, 'epoch': 3} {'type': 'loss', 'content': 0.06802192330360413, 'timestamp': '2025-10-01 04:47:30.759662', 'step': 21662, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:30.823347', 'step': 21662, 'epoch': 3} {'type': 'loss', 'content': 0.09110483527183533, 'timestamp': '2025-10-01 04:47:30.825828', 'step': 21663, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:30.880672', 'step': 21663, 'epoch': 3} {'type': 'loss', 'content': 0.07369046658277512, 'timestamp': '2025-10-01 04:47:30.886590', 'step': 21664, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:30.940763', 'step': 21664, 'epoch': 3} {'type': 'loss', 'content': 0.05945439264178276, 'timestamp': '2025-10-01 04:47:30.943099', 'step': 21665, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:30.996228', 'step': 21665, 'epoch': 3} {'type': 'loss', 'content': 0.056930575519800186, 'timestamp': '2025-10-01 04:47:30.998963', 'step': 21666, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:31.053089', 'step': 21666, 'epoch': 3} {'type': 'loss', 'content': 0.09731201082468033, 'timestamp': '2025-10-01 04:47:31.069443', 'step': 21667, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:31.124559', 'step': 21667, 'epoch': 3} {'type': 'loss', 'content': 0.0692276880145073, 'timestamp': '2025-10-01 04:47:31.130959', 'step': 21668, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:31.191163', 'step': 21668, 'epoch': 3} {'type': 'loss', 'content': 0.025581080466508865, 'timestamp': '2025-10-01 04:47:31.193497', 'step': 21669, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:31.246596', 'step': 21669, 'epoch': 3} {'type': 'loss', 'content': 0.11273439973592758, 'timestamp': '2025-10-01 04:47:31.248847', 'step': 21670, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:31.302383', 'step': 21670, 'epoch': 3} {'type': 'loss', 'content': 0.04445312172174454, 'timestamp': '2025-10-01 04:47:31.304806', 'step': 21671, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:31.364535', 'step': 21671, 'epoch': 3} {'type': 'loss', 'content': 0.11098980903625488, 'timestamp': '2025-10-01 04:47:31.370911', 'step': 21672, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:31.425373', 'step': 21672, 'epoch': 3} {'type': 'loss', 'content': 0.12454475462436676, 'timestamp': '2025-10-01 04:47:31.427581', 'step': 21673, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:31.480866', 'step': 21673, 'epoch': 3} {'type': 'loss', 'content': 0.05788300558924675, 'timestamp': '2025-10-01 04:47:31.483910', 'step': 21674, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:31.538338', 'step': 21674, 'epoch': 3} {'type': 'loss', 'content': 0.06263987720012665, 'timestamp': '2025-10-01 04:47:31.540603', 'step': 21675, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:47:31.594205', 'step': 21675, 'epoch': 3} {'type': 'loss', 'content': 0.11085741221904755, 'timestamp': '2025-10-01 04:47:31.600064', 'step': 21676, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:31.652969', 'step': 21676, 'epoch': 3} {'type': 'loss', 'content': 0.07657450437545776, 'timestamp': '2025-10-01 04:47:31.655552', 'step': 21677, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:31.708632', 'step': 21677, 'epoch': 3} {'type': 'loss', 'content': 0.03638911619782448, 'timestamp': '2025-10-01 04:47:31.710993', 'step': 21678, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:31.765349', 'step': 21678, 'epoch': 3} {'type': 'loss', 'content': 0.09324605762958527, 'timestamp': '2025-10-01 04:47:31.768181', 'step': 21679, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:31.826773', 'step': 21679, 'epoch': 3} {'type': 'loss', 'content': 0.058532025665044785, 'timestamp': '2025-10-01 04:47:31.833914', 'step': 21680, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:31.889829', 'step': 21680, 'epoch': 3} {'type': 'loss', 'content': 0.11923205107450485, 'timestamp': '2025-10-01 04:47:31.892121', 'step': 21681, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:31.944903', 'step': 21681, 'epoch': 3} {'type': 'loss', 'content': 0.08681737631559372, 'timestamp': '2025-10-01 04:47:31.947087', 'step': 21682, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:47:32.000380', 'step': 21682, 'epoch': 3} {'type': 'loss', 'content': 0.048132315278053284, 'timestamp': '2025-10-01 04:47:32.002571', 'step': 21683, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:32.055358', 'step': 21683, 'epoch': 3} {'type': 'loss', 'content': 0.06791696697473526, 'timestamp': '2025-10-01 04:47:32.061213', 'step': 21684, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:32.115215', 'step': 21684, 'epoch': 3} {'type': 'loss', 'content': 0.1429973840713501, 'timestamp': '2025-10-01 04:47:32.117505', 'step': 21685, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:32.174335', 'step': 21685, 'epoch': 3} {'type': 'loss', 'content': 0.13364295661449432, 'timestamp': '2025-10-01 04:47:32.176670', 'step': 21686, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:32.229980', 'step': 21686, 'epoch': 3} {'type': 'loss', 'content': 0.12014632672071457, 'timestamp': '2025-10-01 04:47:32.232255', 'step': 21687, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:32.285490', 'step': 21687, 'epoch': 3} {'type': 'loss', 'content': 0.15806229412555695, 'timestamp': '2025-10-01 04:47:32.291832', 'step': 21688, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:32.344036', 'step': 21688, 'epoch': 3} {'type': 'loss', 'content': 0.028392259031534195, 'timestamp': '2025-10-01 04:47:32.346246', 'step': 21689, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:32.400865', 'step': 21689, 'epoch': 3} {'type': 'loss', 'content': 0.15313497185707092, 'timestamp': '2025-10-01 04:47:32.403311', 'step': 21690, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:47:32.457645', 'step': 21690, 'epoch': 3} {'type': 'loss', 'content': 0.08957859128713608, 'timestamp': '2025-10-01 04:47:32.459820', 'step': 21691, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:32.513669', 'step': 21691, 'epoch': 3} {'type': 'loss', 'content': 0.1269310712814331, 'timestamp': '2025-10-01 04:47:32.519641', 'step': 21692, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:47:32.573008', 'step': 21692, 'epoch': 3} {'type': 'loss', 'content': 0.039806023240089417, 'timestamp': '2025-10-01 04:47:32.575367', 'step': 21693, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:32.628529', 'step': 21693, 'epoch': 3} {'type': 'loss', 'content': 0.08383601903915405, 'timestamp': '2025-10-01 04:47:32.630747', 'step': 21694, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:32.683541', 'step': 21694, 'epoch': 3} {'type': 'loss', 'content': 0.01632363349199295, 'timestamp': '2025-10-01 04:47:32.685773', 'step': 21695, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:32.739618', 'step': 21695, 'epoch': 3} {'type': 'loss', 'content': 0.04748678207397461, 'timestamp': '2025-10-01 04:47:32.745858', 'step': 21696, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:32.799172', 'step': 21696, 'epoch': 3} {'type': 'loss', 'content': 0.045595791190862656, 'timestamp': '2025-10-01 04:47:32.801407', 'step': 21697, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:32.854615', 'step': 21697, 'epoch': 3} {'type': 'loss', 'content': 0.09518036991357803, 'timestamp': '2025-10-01 04:47:32.856901', 'step': 21698, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:32.910599', 'step': 21698, 'epoch': 3} {'type': 'loss', 'content': 0.06819752603769302, 'timestamp': '2025-10-01 04:47:32.912862', 'step': 21699, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:32.966801', 'step': 21699, 'epoch': 3} {'type': 'loss', 'content': 0.11179562658071518, 'timestamp': '2025-10-01 04:47:32.972609', 'step': 21700, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:33.025337', 'step': 21700, 'epoch': 3} {'type': 'loss', 'content': 0.039750609546899796, 'timestamp': '2025-10-01 04:47:33.027562', 'step': 21701, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:33.094126', 'step': 21701, 'epoch': 3} {'type': 'loss', 'content': 0.05106588453054428, 'timestamp': '2025-10-01 04:47:33.096995', 'step': 21702, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:33.150018', 'step': 21702, 'epoch': 3} {'type': 'loss', 'content': 0.13204127550125122, 'timestamp': '2025-10-01 04:47:33.152333', 'step': 21703, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:33.205668', 'step': 21703, 'epoch': 3} {'type': 'loss', 'content': 0.09475398808717728, 'timestamp': '2025-10-01 04:47:33.211609', 'step': 21704, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:33.264721', 'step': 21704, 'epoch': 3} {'type': 'loss', 'content': 0.09065011143684387, 'timestamp': '2025-10-01 04:47:33.284480', 'step': 21705, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:33.337707', 'step': 21705, 'epoch': 3} {'type': 'loss', 'content': 0.040884848684072495, 'timestamp': '2025-10-01 04:47:33.340014', 'step': 21706, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:33.393240', 'step': 21706, 'epoch': 3} {'type': 'loss', 'content': 0.11195290833711624, 'timestamp': '2025-10-01 04:47:33.395566', 'step': 21707, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:33.448682', 'step': 21707, 'epoch': 3} {'type': 'loss', 'content': 0.07567259669303894, 'timestamp': '2025-10-01 04:47:33.454529', 'step': 21708, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:33.507294', 'step': 21708, 'epoch': 3} {'type': 'loss', 'content': 0.08373189717531204, 'timestamp': '2025-10-01 04:47:33.509841', 'step': 21709, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:33.565102', 'step': 21709, 'epoch': 3} {'type': 'loss', 'content': 0.07064323127269745, 'timestamp': '2025-10-01 04:47:33.571208', 'step': 21710, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:33.624640', 'step': 21710, 'epoch': 3} {'type': 'loss', 'content': 0.08571530878543854, 'timestamp': '2025-10-01 04:47:33.627078', 'step': 21711, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:33.680131', 'step': 21711, 'epoch': 3} {'type': 'loss', 'content': 0.05108669027686119, 'timestamp': '2025-10-01 04:47:33.686206', 'step': 21712, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:47:33.742403', 'step': 21712, 'epoch': 3} {'type': 'loss', 'content': 0.0934976190328598, 'timestamp': '2025-10-01 04:47:33.747321', 'step': 21713, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:47:33.800859', 'step': 21713, 'epoch': 3} {'type': 'loss', 'content': 0.0869738981127739, 'timestamp': '2025-10-01 04:47:33.803309', 'step': 21714, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:33.856681', 'step': 21714, 'epoch': 3} {'type': 'loss', 'content': 0.049353595823049545, 'timestamp': '2025-10-01 04:47:33.858963', 'step': 21715, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:33.917224', 'step': 21715, 'epoch': 3} {'type': 'loss', 'content': 0.08296455442905426, 'timestamp': '2025-10-01 04:47:33.926520', 'step': 21716, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:33.979307', 'step': 21716, 'epoch': 3} {'type': 'loss', 'content': 0.08958736062049866, 'timestamp': '2025-10-01 04:47:33.981710', 'step': 21717, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:34.035452', 'step': 21717, 'epoch': 3} {'type': 'loss', 'content': 0.09476052969694138, 'timestamp': '2025-10-01 04:47:34.037631', 'step': 21718, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:34.090865', 'step': 21718, 'epoch': 3} {'type': 'loss', 'content': 0.06443863362073898, 'timestamp': '2025-10-01 04:47:34.102426', 'step': 21719, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:34.155185', 'step': 21719, 'epoch': 3} {'type': 'loss', 'content': 0.133345365524292, 'timestamp': '2025-10-01 04:47:34.161161', 'step': 21720, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:34.213895', 'step': 21720, 'epoch': 3} {'type': 'loss', 'content': 0.10938244313001633, 'timestamp': '2025-10-01 04:47:34.219776', 'step': 21721, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:34.273227', 'step': 21721, 'epoch': 3} {'type': 'loss', 'content': 0.13163058459758759, 'timestamp': '2025-10-01 04:47:34.277876', 'step': 21722, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:34.331303', 'step': 21722, 'epoch': 3} {'type': 'loss', 'content': 0.14650441706180573, 'timestamp': '2025-10-01 04:47:34.333552', 'step': 21723, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:34.387269', 'step': 21723, 'epoch': 3} {'type': 'loss', 'content': 0.05703384801745415, 'timestamp': '2025-10-01 04:47:34.394302', 'step': 21724, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:34.454116', 'step': 21724, 'epoch': 3} {'type': 'loss', 'content': 0.12116102129220963, 'timestamp': '2025-10-01 04:47:34.456468', 'step': 21725, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:34.509752', 'step': 21725, 'epoch': 3} {'type': 'loss', 'content': 0.10277111828327179, 'timestamp': '2025-10-01 04:47:34.512117', 'step': 21726, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:34.565683', 'step': 21726, 'epoch': 3} {'type': 'loss', 'content': 0.023379836231470108, 'timestamp': '2025-10-01 04:47:34.567907', 'step': 21727, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:34.621453', 'step': 21727, 'epoch': 3} {'type': 'loss', 'content': 0.06631869822740555, 'timestamp': '2025-10-01 04:47:34.627331', 'step': 21728, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:34.679147', 'step': 21728, 'epoch': 3} {'type': 'loss', 'content': 0.09172417223453522, 'timestamp': '2025-10-01 04:47:34.681486', 'step': 21729, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:34.747917', 'step': 21729, 'epoch': 3} {'type': 'loss', 'content': 0.08359891176223755, 'timestamp': '2025-10-01 04:47:34.750234', 'step': 21730, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:34.808655', 'step': 21730, 'epoch': 3} {'type': 'loss', 'content': 0.07296598702669144, 'timestamp': '2025-10-01 04:47:34.811367', 'step': 21731, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:34.871370', 'step': 21731, 'epoch': 3} {'type': 'loss', 'content': 0.11773737519979477, 'timestamp': '2025-10-01 04:47:34.877142', 'step': 21732, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:34.929938', 'step': 21732, 'epoch': 3} {'type': 'loss', 'content': 0.023075789213180542, 'timestamp': '2025-10-01 04:47:34.932188', 'step': 21733, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:34.985495', 'step': 21733, 'epoch': 3} {'type': 'loss', 'content': 0.028972404077649117, 'timestamp': '2025-10-01 04:47:34.987918', 'step': 21734, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:35.051463', 'step': 21734, 'epoch': 3} {'type': 'loss', 'content': 0.05926264822483063, 'timestamp': '2025-10-01 04:47:35.054027', 'step': 21735, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:35.107337', 'step': 21735, 'epoch': 3} {'type': 'loss', 'content': 0.06388174742460251, 'timestamp': '2025-10-01 04:47:35.113484', 'step': 21736, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:35.166292', 'step': 21736, 'epoch': 3} {'type': 'loss', 'content': 0.04905702918767929, 'timestamp': '2025-10-01 04:47:35.168767', 'step': 21737, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:35.222157', 'step': 21737, 'epoch': 3} {'type': 'loss', 'content': 0.05171588063240051, 'timestamp': '2025-10-01 04:47:35.224605', 'step': 21738, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:35.278087', 'step': 21738, 'epoch': 3} {'type': 'loss', 'content': 0.0996190682053566, 'timestamp': '2025-10-01 04:47:35.280497', 'step': 21739, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:35.334164', 'step': 21739, 'epoch': 3} {'type': 'loss', 'content': 0.058193061500787735, 'timestamp': '2025-10-01 04:47:35.339994', 'step': 21740, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:35.392367', 'step': 21740, 'epoch': 3} {'type': 'loss', 'content': 0.09212382882833481, 'timestamp': '2025-10-01 04:47:35.394841', 'step': 21741, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:35.459086', 'step': 21741, 'epoch': 3} {'type': 'loss', 'content': 0.06349590420722961, 'timestamp': '2025-10-01 04:47:35.461327', 'step': 21742, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:47:35.514911', 'step': 21742, 'epoch': 3} {'type': 'loss', 'content': 0.07875456660985947, 'timestamp': '2025-10-01 04:47:35.517887', 'step': 21743, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:47:35.575014', 'step': 21743, 'epoch': 3} {'type': 'loss', 'content': 0.10171157121658325, 'timestamp': '2025-10-01 04:47:35.581253', 'step': 21744, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:35.635128', 'step': 21744, 'epoch': 3} {'type': 'loss', 'content': 0.06057310476899147, 'timestamp': '2025-10-01 04:47:35.637523', 'step': 21745, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:35.690554', 'step': 21745, 'epoch': 3} {'type': 'loss', 'content': 0.1241016536951065, 'timestamp': '2025-10-01 04:47:35.692934', 'step': 21746, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:35.747653', 'step': 21746, 'epoch': 3} {'type': 'loss', 'content': 0.12382053583860397, 'timestamp': '2025-10-01 04:47:35.749902', 'step': 21747, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:47:35.803372', 'step': 21747, 'epoch': 3} {'type': 'loss', 'content': 0.05252637341618538, 'timestamp': '2025-10-01 04:47:35.821911', 'step': 21748, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:35.874876', 'step': 21748, 'epoch': 3} {'type': 'loss', 'content': 0.10134080797433853, 'timestamp': '2025-10-01 04:47:35.877234', 'step': 21749, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:47:35.930924', 'step': 21749, 'epoch': 3} {'type': 'loss', 'content': 0.06386532634496689, 'timestamp': '2025-10-01 04:47:35.933283', 'step': 21750, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:35.986500', 'step': 21750, 'epoch': 3} {'type': 'loss', 'content': 0.105209119617939, 'timestamp': '2025-10-01 04:47:35.988781', 'step': 21751, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:36.042382', 'step': 21751, 'epoch': 3} {'type': 'loss', 'content': 0.04862803593277931, 'timestamp': '2025-10-01 04:47:36.048166', 'step': 21752, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:36.100300', 'step': 21752, 'epoch': 3} {'type': 'loss', 'content': 0.12453393638134003, 'timestamp': '2025-10-01 04:47:36.102675', 'step': 21753, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:47:36.155588', 'step': 21753, 'epoch': 3} {'type': 'loss', 'content': 0.06015271320939064, 'timestamp': '2025-10-01 04:47:36.157839', 'step': 21754, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:36.211220', 'step': 21754, 'epoch': 3} {'type': 'loss', 'content': 0.06613799184560776, 'timestamp': '2025-10-01 04:47:36.213419', 'step': 21755, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:36.273664', 'step': 21755, 'epoch': 3} {'type': 'loss', 'content': 0.08294955641031265, 'timestamp': '2025-10-01 04:47:36.279438', 'step': 21756, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:47:36.333069', 'step': 21756, 'epoch': 3} {'type': 'loss', 'content': 0.09031810611486435, 'timestamp': '2025-10-01 04:47:36.335280', 'step': 21757, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:36.388111', 'step': 21757, 'epoch': 3} {'type': 'loss', 'content': 0.1111922338604927, 'timestamp': '2025-10-01 04:47:36.390337', 'step': 21758, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:47:36.444193', 'step': 21758, 'epoch': 3} {'type': 'loss', 'content': 0.10174796730279922, 'timestamp': '2025-10-01 04:47:36.446439', 'step': 21759, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:36.499461', 'step': 21759, 'epoch': 3} {'type': 'loss', 'content': 0.06108910217881203, 'timestamp': '2025-10-01 04:47:36.505442', 'step': 21760, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:47:36.566073', 'step': 21760, 'epoch': 3} {'type': 'loss', 'content': 0.08417574316263199, 'timestamp': '2025-10-01 04:47:36.568355', 'step': 21761, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:36.621576', 'step': 21761, 'epoch': 3} {'type': 'loss', 'content': 0.04660661518573761, 'timestamp': '2025-10-01 04:47:36.623789', 'step': 21762, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:36.678852', 'step': 21762, 'epoch': 3} {'type': 'loss', 'content': 0.11804409325122833, 'timestamp': '2025-10-01 04:47:36.682603', 'step': 21763, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:36.736281', 'step': 21763, 'epoch': 3} {'type': 'loss', 'content': 0.04238325357437134, 'timestamp': '2025-10-01 04:47:36.742337', 'step': 21764, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:36.795358', 'step': 21764, 'epoch': 3} {'type': 'loss', 'content': 0.11159085482358932, 'timestamp': '2025-10-01 04:47:36.797745', 'step': 21765, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:36.850473', 'step': 21765, 'epoch': 3} {'type': 'loss', 'content': 0.05607210844755173, 'timestamp': '2025-10-01 04:47:36.852770', 'step': 21766, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:36.911253', 'step': 21766, 'epoch': 3} {'type': 'loss', 'content': 0.0555993914604187, 'timestamp': '2025-10-01 04:47:36.913619', 'step': 21767, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:36.967198', 'step': 21767, 'epoch': 3} {'type': 'loss', 'content': 0.08436676859855652, 'timestamp': '2025-10-01 04:47:36.973186', 'step': 21768, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:37.030981', 'step': 21768, 'epoch': 3} {'type': 'loss', 'content': 0.08386481553316116, 'timestamp': '2025-10-01 04:47:37.033292', 'step': 21769, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:37.086422', 'step': 21769, 'epoch': 3} {'type': 'loss', 'content': 0.028588024899363518, 'timestamp': '2025-10-01 04:47:37.088813', 'step': 21770, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:37.147569', 'step': 21770, 'epoch': 3} {'type': 'loss', 'content': 0.03359637409448624, 'timestamp': '2025-10-01 04:47:37.149855', 'step': 21771, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:37.203132', 'step': 21771, 'epoch': 3} {'type': 'loss', 'content': 0.03196553513407707, 'timestamp': '2025-10-01 04:47:37.209004', 'step': 21772, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:37.261414', 'step': 21772, 'epoch': 3} {'type': 'loss', 'content': 0.08631658554077148, 'timestamp': '2025-10-01 04:47:37.263709', 'step': 21773, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:47:37.317104', 'step': 21773, 'epoch': 3} {'type': 'loss', 'content': 0.08532370626926422, 'timestamp': '2025-10-01 04:47:37.319412', 'step': 21774, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:37.372994', 'step': 21774, 'epoch': 3} {'type': 'loss', 'content': 0.09055633097887039, 'timestamp': '2025-10-01 04:47:37.375285', 'step': 21775, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:37.428568', 'step': 21775, 'epoch': 3} {'type': 'loss', 'content': 0.06911837309598923, 'timestamp': '2025-10-01 04:47:37.434301', 'step': 21776, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:37.486910', 'step': 21776, 'epoch': 3} {'type': 'loss', 'content': 0.05281667783856392, 'timestamp': '2025-10-01 04:47:37.489118', 'step': 21777, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:37.542224', 'step': 21777, 'epoch': 3} {'type': 'loss', 'content': 0.06707628071308136, 'timestamp': '2025-10-01 04:47:37.544659', 'step': 21778, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:47:37.597706', 'step': 21778, 'epoch': 3} {'type': 'loss', 'content': 0.05465076491236687, 'timestamp': '2025-10-01 04:47:37.600187', 'step': 21779, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:37.653379', 'step': 21779, 'epoch': 3} {'type': 'loss', 'content': 0.07036834955215454, 'timestamp': '2025-10-01 04:47:37.659126', 'step': 21780, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:37.711619', 'step': 21780, 'epoch': 3} {'type': 'loss', 'content': 0.08551301062107086, 'timestamp': '2025-10-01 04:47:37.713806', 'step': 21781, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:37.767111', 'step': 21781, 'epoch': 3} {'type': 'loss', 'content': 0.05731126666069031, 'timestamp': '2025-10-01 04:47:37.769298', 'step': 21782, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:37.822359', 'step': 21782, 'epoch': 3} {'type': 'loss', 'content': 0.14749658107757568, 'timestamp': '2025-10-01 04:47:37.824558', 'step': 21783, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:37.877535', 'step': 21783, 'epoch': 3} {'type': 'loss', 'content': 0.1589059978723526, 'timestamp': '2025-10-01 04:47:37.883261', 'step': 21784, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:37.936241', 'step': 21784, 'epoch': 3} {'type': 'loss', 'content': 0.04818476364016533, 'timestamp': '2025-10-01 04:47:37.938590', 'step': 21785, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:37.991312', 'step': 21785, 'epoch': 3} {'type': 'loss', 'content': 0.05410628765821457, 'timestamp': '2025-10-01 04:47:37.993569', 'step': 21786, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:38.052792', 'step': 21786, 'epoch': 3} {'type': 'loss', 'content': 0.09061326831579208, 'timestamp': '2025-10-01 04:47:38.055011', 'step': 21787, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:38.108409', 'step': 21787, 'epoch': 3} {'type': 'loss', 'content': 0.06560558825731277, 'timestamp': '2025-10-01 04:47:38.114567', 'step': 21788, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:38.166995', 'step': 21788, 'epoch': 3} {'type': 'loss', 'content': 0.09188800305128098, 'timestamp': '2025-10-01 04:47:38.175331', 'step': 21789, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:38.228755', 'step': 21789, 'epoch': 3} {'type': 'loss', 'content': 0.05066264420747757, 'timestamp': '2025-10-01 04:47:38.230993', 'step': 21790, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:38.283945', 'step': 21790, 'epoch': 3} {'type': 'loss', 'content': 0.12627865374088287, 'timestamp': '2025-10-01 04:47:38.286266', 'step': 21791, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:38.339218', 'step': 21791, 'epoch': 3} {'type': 'loss', 'content': 0.12524698674678802, 'timestamp': '2025-10-01 04:47:38.345149', 'step': 21792, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:38.397774', 'step': 21792, 'epoch': 3} {'type': 'loss', 'content': 0.1182696670293808, 'timestamp': '2025-10-01 04:47:38.400306', 'step': 21793, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:47:38.453158', 'step': 21793, 'epoch': 3} {'type': 'loss', 'content': 0.09846985340118408, 'timestamp': '2025-10-01 04:47:38.455485', 'step': 21794, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:38.511374', 'step': 21794, 'epoch': 3} {'type': 'loss', 'content': 0.12658026814460754, 'timestamp': '2025-10-01 04:47:38.515294', 'step': 21795, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:47:38.571409', 'step': 21795, 'epoch': 3} {'type': 'loss', 'content': 0.09353454411029816, 'timestamp': '2025-10-01 04:47:38.577243', 'step': 21796, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:38.629644', 'step': 21796, 'epoch': 3} {'type': 'loss', 'content': 0.1133572906255722, 'timestamp': '2025-10-01 04:47:38.631859', 'step': 21797, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:38.685218', 'step': 21797, 'epoch': 3} {'type': 'loss', 'content': 0.07903631031513214, 'timestamp': '2025-10-01 04:47:38.688334', 'step': 21798, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:38.741714', 'step': 21798, 'epoch': 3} {'type': 'loss', 'content': 0.05496842414140701, 'timestamp': '2025-10-01 04:47:38.743954', 'step': 21799, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:38.797324', 'step': 21799, 'epoch': 3} {'type': 'loss', 'content': 0.030330240726470947, 'timestamp': '2025-10-01 04:47:38.803317', 'step': 21800, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:47:38.856437', 'step': 21800, 'epoch': 3} {'type': 'loss', 'content': 0.07198034226894379, 'timestamp': '2025-10-01 04:47:38.859257', 'step': 21801, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:38.911934', 'step': 21801, 'epoch': 3} {'type': 'loss', 'content': 0.16379804909229279, 'timestamp': '2025-10-01 04:47:38.914199', 'step': 21802, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:38.967621', 'step': 21802, 'epoch': 3} {'type': 'loss', 'content': 0.05193077027797699, 'timestamp': '2025-10-01 04:47:38.969939', 'step': 21803, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:47:39.023137', 'step': 21803, 'epoch': 3} {'type': 'loss', 'content': 0.10339540988206863, 'timestamp': '2025-10-01 04:47:39.028923', 'step': 21804, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:39.082289', 'step': 21804, 'epoch': 3} {'type': 'loss', 'content': 0.07770807296037674, 'timestamp': '2025-10-01 04:47:39.084585', 'step': 21805, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:39.144025', 'step': 21805, 'epoch': 3} {'type': 'loss', 'content': 0.0655691921710968, 'timestamp': '2025-10-01 04:47:39.146402', 'step': 21806, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:39.199623', 'step': 21806, 'epoch': 3} {'type': 'loss', 'content': 0.06193453073501587, 'timestamp': '2025-10-01 04:47:39.202268', 'step': 21807, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:47:39.255768', 'step': 21807, 'epoch': 3} {'type': 'loss', 'content': 0.04003986716270447, 'timestamp': '2025-10-01 04:47:39.262237', 'step': 21808, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:39.315456', 'step': 21808, 'epoch': 3} {'type': 'loss', 'content': 0.06220650300383568, 'timestamp': '2025-10-01 04:47:39.319145', 'step': 21809, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:39.371960', 'step': 21809, 'epoch': 3} {'type': 'loss', 'content': 0.04792812466621399, 'timestamp': '2025-10-01 04:47:39.374722', 'step': 21810, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:39.428603', 'step': 21810, 'epoch': 3} {'type': 'loss', 'content': 0.09673161059617996, 'timestamp': '2025-10-01 04:47:39.430834', 'step': 21811, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:39.484109', 'step': 21811, 'epoch': 3} {'type': 'loss', 'content': 0.08986740559339523, 'timestamp': '2025-10-01 04:47:39.489869', 'step': 21812, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:39.542373', 'step': 21812, 'epoch': 3} {'type': 'loss', 'content': 0.14086157083511353, 'timestamp': '2025-10-01 04:47:39.544571', 'step': 21813, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:39.597726', 'step': 21813, 'epoch': 3} {'type': 'loss', 'content': 0.06456393003463745, 'timestamp': '2025-10-01 04:47:39.599933', 'step': 21814, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:39.654378', 'step': 21814, 'epoch': 3} {'type': 'loss', 'content': 0.10824402421712875, 'timestamp': '2025-10-01 04:47:39.656712', 'step': 21815, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:39.709473', 'step': 21815, 'epoch': 3} {'type': 'loss', 'content': 0.09064704924821854, 'timestamp': '2025-10-01 04:47:39.715455', 'step': 21816, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:39.768278', 'step': 21816, 'epoch': 3} {'type': 'loss', 'content': 0.07273653894662857, 'timestamp': '2025-10-01 04:47:39.770570', 'step': 21817, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:39.823902', 'step': 21817, 'epoch': 3} {'type': 'loss', 'content': 0.10585501790046692, 'timestamp': '2025-10-01 04:47:39.826176', 'step': 21818, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:39.879673', 'step': 21818, 'epoch': 3} {'type': 'loss', 'content': 0.0885196253657341, 'timestamp': '2025-10-01 04:47:39.881959', 'step': 21819, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:47:39.934916', 'step': 21819, 'epoch': 3} {'type': 'loss', 'content': 0.09176696836948395, 'timestamp': '2025-10-01 04:47:39.940802', 'step': 21820, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:47:39.994060', 'step': 21820, 'epoch': 3} {'type': 'loss', 'content': 0.03440925478935242, 'timestamp': '2025-10-01 04:47:39.996361', 'step': 21821, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:40.052971', 'step': 21821, 'epoch': 3} {'type': 'loss', 'content': 0.14499638974666595, 'timestamp': '2025-10-01 04:47:40.055502', 'step': 21822, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:47:40.109327', 'step': 21822, 'epoch': 3} {'type': 'loss', 'content': 0.13842341303825378, 'timestamp': '2025-10-01 04:47:40.111709', 'step': 21823, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:40.165021', 'step': 21823, 'epoch': 3} {'type': 'loss', 'content': 0.056879088282585144, 'timestamp': '2025-10-01 04:47:40.170952', 'step': 21824, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:40.230791', 'step': 21824, 'epoch': 3} {'type': 'loss', 'content': 0.10263087600469589, 'timestamp': '2025-10-01 04:47:40.232997', 'step': 21825, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:40.285551', 'step': 21825, 'epoch': 3} {'type': 'loss', 'content': 0.1435687243938446, 'timestamp': '2025-10-01 04:47:40.288385', 'step': 21826, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:47:40.342422', 'step': 21826, 'epoch': 3} {'type': 'loss', 'content': 0.09006627649068832, 'timestamp': '2025-10-01 04:47:40.344675', 'step': 21827, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:40.397870', 'step': 21827, 'epoch': 3} {'type': 'loss', 'content': 0.03686247020959854, 'timestamp': '2025-10-01 04:47:40.403841', 'step': 21828, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:40.456876', 'step': 21828, 'epoch': 3} {'type': 'loss', 'content': 0.08028196543455124, 'timestamp': '2025-10-01 04:47:40.459056', 'step': 21829, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:40.513360', 'step': 21829, 'epoch': 3} {'type': 'loss', 'content': 0.17392896115779877, 'timestamp': '2025-10-01 04:47:40.515656', 'step': 21830, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:40.569602', 'step': 21830, 'epoch': 3} {'type': 'loss', 'content': 0.06774117052555084, 'timestamp': '2025-10-01 04:47:40.571778', 'step': 21831, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:40.624740', 'step': 21831, 'epoch': 3} {'type': 'loss', 'content': 0.07301374524831772, 'timestamp': '2025-10-01 04:47:40.630543', 'step': 21832, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:40.684321', 'step': 21832, 'epoch': 3} {'type': 'loss', 'content': 0.077274389564991, 'timestamp': '2025-10-01 04:47:40.686519', 'step': 21833, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:40.739228', 'step': 21833, 'epoch': 3} {'type': 'loss', 'content': 0.035311777144670486, 'timestamp': '2025-10-01 04:47:40.741569', 'step': 21834, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:40.796071', 'step': 21834, 'epoch': 3} {'type': 'loss', 'content': 0.11418566852807999, 'timestamp': '2025-10-01 04:47:40.798482', 'step': 21835, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:40.851703', 'step': 21835, 'epoch': 3} {'type': 'loss', 'content': 0.0764649510383606, 'timestamp': '2025-10-01 04:47:40.857588', 'step': 21836, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:47:40.909961', 'step': 21836, 'epoch': 3} {'type': 'loss', 'content': 0.03289184719324112, 'timestamp': '2025-10-01 04:47:40.912149', 'step': 21837, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:40.964953', 'step': 21837, 'epoch': 3} {'type': 'loss', 'content': 0.024186845868825912, 'timestamp': '2025-10-01 04:47:40.967234', 'step': 21838, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:47:41.028052', 'step': 21838, 'epoch': 3} {'type': 'loss', 'content': 0.1035461574792862, 'timestamp': '2025-10-01 04:47:41.031078', 'step': 21839, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:41.085615', 'step': 21839, 'epoch': 3} {'type': 'loss', 'content': 0.03782563656568527, 'timestamp': '2025-10-01 04:47:41.091431', 'step': 21840, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:41.144068', 'step': 21840, 'epoch': 3} {'type': 'loss', 'content': 0.045325521379709244, 'timestamp': '2025-10-01 04:47:41.146253', 'step': 21841, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:41.199707', 'step': 21841, 'epoch': 3} {'type': 'loss', 'content': 0.06756135821342468, 'timestamp': '2025-10-01 04:47:41.201906', 'step': 21842, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:41.256603', 'step': 21842, 'epoch': 3} {'type': 'loss', 'content': 0.07178125530481339, 'timestamp': '2025-10-01 04:47:41.258798', 'step': 21843, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:41.311851', 'step': 21843, 'epoch': 3} {'type': 'loss', 'content': 0.07094612717628479, 'timestamp': '2025-10-01 04:47:41.318068', 'step': 21844, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:41.371477', 'step': 21844, 'epoch': 3} {'type': 'loss', 'content': 0.08170682936906815, 'timestamp': '2025-10-01 04:47:41.375456', 'step': 21845, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:47:41.432610', 'step': 21845, 'epoch': 3} {'type': 'loss', 'content': 0.06370154768228531, 'timestamp': '2025-10-01 04:47:41.435007', 'step': 21846, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:41.490658', 'step': 21846, 'epoch': 3} {'type': 'loss', 'content': 0.05459672212600708, 'timestamp': '2025-10-01 04:47:41.493085', 'step': 21847, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:41.546799', 'step': 21847, 'epoch': 3} {'type': 'loss', 'content': 0.07270141690969467, 'timestamp': '2025-10-01 04:47:41.553162', 'step': 21848, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:41.607381', 'step': 21848, 'epoch': 3} {'type': 'loss', 'content': 0.11494126915931702, 'timestamp': '2025-10-01 04:47:41.610000', 'step': 21849, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:41.664193', 'step': 21849, 'epoch': 3} {'type': 'loss', 'content': 0.08365955203771591, 'timestamp': '2025-10-01 04:47:41.666536', 'step': 21850, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:41.720687', 'step': 21850, 'epoch': 3} {'type': 'loss', 'content': 0.04535103216767311, 'timestamp': '2025-10-01 04:47:41.722984', 'step': 21851, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:41.782581', 'step': 21851, 'epoch': 3} {'type': 'loss', 'content': 0.032571230083703995, 'timestamp': '2025-10-01 04:47:41.788919', 'step': 21852, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:47:41.842408', 'step': 21852, 'epoch': 3} {'type': 'loss', 'content': 0.036013904958963394, 'timestamp': '2025-10-01 04:47:41.844681', 'step': 21853, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:41.898116', 'step': 21853, 'epoch': 3} {'type': 'loss', 'content': 0.07339523732662201, 'timestamp': '2025-10-01 04:47:41.900063', 'step': 21854, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:41.955826', 'step': 21854, 'epoch': 3} {'type': 'loss', 'content': 0.04462883621454239, 'timestamp': '2025-10-01 04:47:41.958657', 'step': 21855, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:42.012655', 'step': 21855, 'epoch': 3} {'type': 'loss', 'content': 0.11723841726779938, 'timestamp': '2025-10-01 04:47:42.019152', 'step': 21856, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:42.072351', 'step': 21856, 'epoch': 3} {'type': 'loss', 'content': 0.05185723677277565, 'timestamp': '2025-10-01 04:47:42.074856', 'step': 21857, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:42.128362', 'step': 21857, 'epoch': 3} {'type': 'loss', 'content': 0.05617785081267357, 'timestamp': '2025-10-01 04:47:42.130774', 'step': 21858, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-01 04:47:42.185274', 'step': 21858, 'epoch': 3} {'type': 'loss', 'content': 0.052565086632966995, 'timestamp': '2025-10-01 04:47:42.188028', 'step': 21859, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:42.242550', 'step': 21859, 'epoch': 3} {'type': 'loss', 'content': 0.07262558490037918, 'timestamp': '2025-10-01 04:47:42.249063', 'step': 21860, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:42.302539', 'step': 21860, 'epoch': 3} {'type': 'loss', 'content': 0.09589264541864395, 'timestamp': '2025-10-01 04:47:42.307171', 'step': 21861, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:42.360984', 'step': 21861, 'epoch': 3} {'type': 'loss', 'content': 0.06357982009649277, 'timestamp': '2025-10-01 04:47:42.363697', 'step': 21862, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:42.417725', 'step': 21862, 'epoch': 3} {'type': 'loss', 'content': 0.08607614040374756, 'timestamp': '2025-10-01 04:47:42.420421', 'step': 21863, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:42.474848', 'step': 21863, 'epoch': 3} {'type': 'loss', 'content': 0.06798616051673889, 'timestamp': '2025-10-01 04:47:42.481504', 'step': 21864, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:42.543429', 'step': 21864, 'epoch': 3} {'type': 'loss', 'content': 0.04327865317463875, 'timestamp': '2025-10-01 04:47:42.546072', 'step': 21865, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:42.599795', 'step': 21865, 'epoch': 3} {'type': 'loss', 'content': 0.08674705773591995, 'timestamp': '2025-10-01 04:47:42.602732', 'step': 21866, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:42.658712', 'step': 21866, 'epoch': 3} {'type': 'loss', 'content': 0.0664479210972786, 'timestamp': '2025-10-01 04:47:42.661429', 'step': 21867, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:47:42.716348', 'step': 21867, 'epoch': 3} {'type': 'loss', 'content': 0.05407366156578064, 'timestamp': '2025-10-01 04:47:42.722992', 'step': 21868, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:42.778413', 'step': 21868, 'epoch': 3} {'type': 'loss', 'content': 0.08471200615167618, 'timestamp': '2025-10-01 04:47:42.782828', 'step': 21869, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:47:42.838008', 'step': 21869, 'epoch': 3} {'type': 'loss', 'content': 0.15144729614257812, 'timestamp': '2025-10-01 04:47:42.840549', 'step': 21870, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:42.895664', 'step': 21870, 'epoch': 3} {'type': 'loss', 'content': 0.09185972064733505, 'timestamp': '2025-10-01 04:47:42.898155', 'step': 21871, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:42.952717', 'step': 21871, 'epoch': 3} {'type': 'loss', 'content': 0.032716304063797, 'timestamp': '2025-10-01 04:47:42.959947', 'step': 21872, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:43.013791', 'step': 21872, 'epoch': 3} {'type': 'loss', 'content': 0.1123165562748909, 'timestamp': '2025-10-01 04:47:43.017167', 'step': 21873, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:43.071405', 'step': 21873, 'epoch': 3} {'type': 'loss', 'content': 0.07399728149175644, 'timestamp': '2025-10-01 04:47:43.074001', 'step': 21874, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:47:43.130897', 'step': 21874, 'epoch': 3} {'type': 'loss', 'content': 0.15946143865585327, 'timestamp': '2025-10-01 04:47:43.133659', 'step': 21875, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:43.191137', 'step': 21875, 'epoch': 3} {'type': 'loss', 'content': 0.04097986966371536, 'timestamp': '2025-10-01 04:47:43.198151', 'step': 21876, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:43.257479', 'step': 21876, 'epoch': 3} {'type': 'loss', 'content': 0.20475558936595917, 'timestamp': '2025-10-01 04:47:43.263307', 'step': 21877, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:43.332606', 'step': 21877, 'epoch': 3} {'type': 'loss', 'content': 0.1009928435087204, 'timestamp': '2025-10-01 04:47:43.335433', 'step': 21878, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:43.395489', 'step': 21878, 'epoch': 3} {'type': 'loss', 'content': 0.08439817279577255, 'timestamp': '2025-10-01 04:47:43.399198', 'step': 21879, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:47:43.458033', 'step': 21879, 'epoch': 3} {'type': 'loss', 'content': 0.09722767025232315, 'timestamp': '2025-10-01 04:47:43.464788', 'step': 21880, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:43.522390', 'step': 21880, 'epoch': 3} {'type': 'loss', 'content': 0.12825781106948853, 'timestamp': '2025-10-01 04:47:43.524623', 'step': 21881, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:43.582781', 'step': 21881, 'epoch': 3} {'type': 'loss', 'content': 0.10180897265672684, 'timestamp': '2025-10-01 04:47:43.585241', 'step': 21882, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:43.644468', 'step': 21882, 'epoch': 3} {'type': 'loss', 'content': 0.054666224867105484, 'timestamp': '2025-10-01 04:47:43.646734', 'step': 21883, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:43.705079', 'step': 21883, 'epoch': 3} {'type': 'loss', 'content': 0.02682102844119072, 'timestamp': '2025-10-01 04:47:43.712119', 'step': 21884, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:43.768508', 'step': 21884, 'epoch': 3} {'type': 'loss', 'content': 0.09202533960342407, 'timestamp': '2025-10-01 04:47:43.770885', 'step': 21885, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:43.830298', 'step': 21885, 'epoch': 3} {'type': 'loss', 'content': 0.03198448568582535, 'timestamp': '2025-10-01 04:47:43.832688', 'step': 21886, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:43.891682', 'step': 21886, 'epoch': 3} {'type': 'loss', 'content': 0.06420847028493881, 'timestamp': '2025-10-01 04:47:43.894092', 'step': 21887, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:43.952211', 'step': 21887, 'epoch': 3} {'type': 'loss', 'content': 0.08533336222171783, 'timestamp': '2025-10-01 04:47:43.959322', 'step': 21888, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:44.016406', 'step': 21888, 'epoch': 3} {'type': 'loss', 'content': 0.02057759091258049, 'timestamp': '2025-10-01 04:47:44.018653', 'step': 21889, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:44.077086', 'step': 21889, 'epoch': 3} {'type': 'loss', 'content': 0.07956895232200623, 'timestamp': '2025-10-01 04:47:44.079554', 'step': 21890, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:44.136335', 'step': 21890, 'epoch': 3} {'type': 'loss', 'content': 0.12329915165901184, 'timestamp': '2025-10-01 04:47:44.138738', 'step': 21891, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:44.196823', 'step': 21891, 'epoch': 3} {'type': 'loss', 'content': 0.07061953097581863, 'timestamp': '2025-10-01 04:47:44.203764', 'step': 21892, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:47:44.263132', 'step': 21892, 'epoch': 3} {'type': 'loss', 'content': 0.16817989945411682, 'timestamp': '2025-10-01 04:47:44.265711', 'step': 21893, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:44.322474', 'step': 21893, 'epoch': 3} {'type': 'loss', 'content': 0.09763409942388535, 'timestamp': '2025-10-01 04:47:44.325234', 'step': 21894, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:44.384063', 'step': 21894, 'epoch': 3} {'type': 'loss', 'content': 0.12012680619955063, 'timestamp': '2025-10-01 04:47:44.386394', 'step': 21895, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:44.443063', 'step': 21895, 'epoch': 3} {'type': 'loss', 'content': 0.07332152873277664, 'timestamp': '2025-10-01 04:47:44.450114', 'step': 21896, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:44.511015', 'step': 21896, 'epoch': 3} {'type': 'loss', 'content': 0.10197144001722336, 'timestamp': '2025-10-01 04:47:44.513306', 'step': 21897, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:44.570733', 'step': 21897, 'epoch': 3} {'type': 'loss', 'content': 0.17818693816661835, 'timestamp': '2025-10-01 04:47:44.573069', 'step': 21898, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:44.630088', 'step': 21898, 'epoch': 3} {'type': 'loss', 'content': 0.08649200946092606, 'timestamp': '2025-10-01 04:47:44.632734', 'step': 21899, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:44.688904', 'step': 21899, 'epoch': 3} {'type': 'loss', 'content': 0.08782924711704254, 'timestamp': '2025-10-01 04:47:44.696154', 'step': 21900, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:44.754655', 'step': 21900, 'epoch': 3} {'type': 'loss', 'content': 0.06682343035936356, 'timestamp': '2025-10-01 04:47:44.756964', 'step': 21901, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:44.809984', 'step': 21901, 'epoch': 3} {'type': 'loss', 'content': 0.02543204091489315, 'timestamp': '2025-10-01 04:47:44.812373', 'step': 21902, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-01 04:47:57.518993', 'step': 21902, 'epoch': 3} {'type': 'pplx', 'content': 9251.929005807171, 'timestamp': '2025-10-01 04:47:57.522311', 'step': 21902, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:57.577932', 'step': 21902, 'epoch': 3} {'type': 'loss', 'content': 0.13021109998226166, 'timestamp': '2025-10-01 04:47:57.580242', 'step': 21903, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:57.635363', 'step': 21903, 'epoch': 3} {'type': 'loss', 'content': 0.03823184594511986, 'timestamp': '2025-10-01 04:47:57.643291', 'step': 21904, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:57.701922', 'step': 21904, 'epoch': 3} {'type': 'loss', 'content': 0.0594414547085762, 'timestamp': '2025-10-01 04:47:57.704141', 'step': 21905, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:47:57.758842', 'step': 21905, 'epoch': 3} {'type': 'loss', 'content': 0.0928931012749672, 'timestamp': '2025-10-01 04:47:57.761041', 'step': 21906, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:57.815868', 'step': 21906, 'epoch': 3} {'type': 'loss', 'content': 0.10659573972225189, 'timestamp': '2025-10-01 04:47:57.818060', 'step': 21907, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:57.875350', 'step': 21907, 'epoch': 3} {'type': 'loss', 'content': 0.05537920817732811, 'timestamp': '2025-10-01 04:47:57.881818', 'step': 21908, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:57.935218', 'step': 21908, 'epoch': 3} {'type': 'loss', 'content': 0.03636762872338295, 'timestamp': '2025-10-01 04:47:57.937393', 'step': 21909, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:57.992166', 'step': 21909, 'epoch': 3} {'type': 'loss', 'content': 0.11068496853113174, 'timestamp': '2025-10-01 04:47:57.994660', 'step': 21910, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:58.051602', 'step': 21910, 'epoch': 3} {'type': 'loss', 'content': 0.15071052312850952, 'timestamp': '2025-10-01 04:47:58.053913', 'step': 21911, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:58.109587', 'step': 21911, 'epoch': 3} {'type': 'loss', 'content': 0.04795359820127487, 'timestamp': '2025-10-01 04:47:58.115558', 'step': 21912, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:58.169367', 'step': 21912, 'epoch': 3} {'type': 'loss', 'content': 0.1183476373553276, 'timestamp': '2025-10-01 04:47:58.172069', 'step': 21913, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:47:58.226835', 'step': 21913, 'epoch': 3} {'type': 'loss', 'content': 0.03556199371814728, 'timestamp': '2025-10-01 04:47:58.229095', 'step': 21914, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:58.282817', 'step': 21914, 'epoch': 3} {'type': 'loss', 'content': 0.18617083132266998, 'timestamp': '2025-10-01 04:47:58.285718', 'step': 21915, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:58.339438', 'step': 21915, 'epoch': 3} {'type': 'loss', 'content': 0.06338950991630554, 'timestamp': '2025-10-01 04:47:58.347667', 'step': 21916, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:47:58.401000', 'step': 21916, 'epoch': 3} {'type': 'loss', 'content': 0.05671490728855133, 'timestamp': '2025-10-01 04:47:58.403366', 'step': 21917, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:58.470353', 'step': 21917, 'epoch': 3} {'type': 'loss', 'content': 0.12009923160076141, 'timestamp': '2025-10-01 04:47:58.472887', 'step': 21918, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:58.527023', 'step': 21918, 'epoch': 3} {'type': 'loss', 'content': 0.0494937002658844, 'timestamp': '2025-10-01 04:47:58.529969', 'step': 21919, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:58.583359', 'step': 21919, 'epoch': 3} {'type': 'loss', 'content': 0.13612110912799835, 'timestamp': '2025-10-01 04:47:58.589759', 'step': 21920, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:58.643190', 'step': 21920, 'epoch': 3} {'type': 'loss', 'content': 0.00850397814065218, 'timestamp': '2025-10-01 04:47:58.645436', 'step': 21921, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:58.699819', 'step': 21921, 'epoch': 3} {'type': 'loss', 'content': 0.07361430674791336, 'timestamp': '2025-10-01 04:47:58.702031', 'step': 21922, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:58.764737', 'step': 21922, 'epoch': 3} {'type': 'loss', 'content': 0.12063853442668915, 'timestamp': '2025-10-01 04:47:58.768763', 'step': 21923, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:58.823046', 'step': 21923, 'epoch': 3} {'type': 'loss', 'content': 0.10596368461847305, 'timestamp': '2025-10-01 04:47:58.829179', 'step': 21924, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:58.891140', 'step': 21924, 'epoch': 3} {'type': 'loss', 'content': 0.06932273507118225, 'timestamp': '2025-10-01 04:47:58.894065', 'step': 21925, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:58.947174', 'step': 21925, 'epoch': 3} {'type': 'loss', 'content': 0.07142581790685654, 'timestamp': '2025-10-01 04:47:58.949397', 'step': 21926, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:59.002237', 'step': 21926, 'epoch': 3} {'type': 'loss', 'content': 0.0910373106598854, 'timestamp': '2025-10-01 04:47:59.004609', 'step': 21927, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:59.058293', 'step': 21927, 'epoch': 3} {'type': 'loss', 'content': 0.06041346862912178, 'timestamp': '2025-10-01 04:47:59.064322', 'step': 21928, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:59.118085', 'step': 21928, 'epoch': 3} {'type': 'loss', 'content': 0.14752505719661713, 'timestamp': '2025-10-01 04:47:59.120629', 'step': 21929, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:59.178125', 'step': 21929, 'epoch': 3} {'type': 'loss', 'content': 0.07576189935207367, 'timestamp': '2025-10-01 04:47:59.180648', 'step': 21930, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:59.237659', 'step': 21930, 'epoch': 3} {'type': 'loss', 'content': 0.07001674920320511, 'timestamp': '2025-10-01 04:47:59.240598', 'step': 21931, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:59.316006', 'step': 21931, 'epoch': 3} {'type': 'loss', 'content': 0.10958744585514069, 'timestamp': '2025-10-01 04:47:59.322866', 'step': 21932, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:59.379316', 'step': 21932, 'epoch': 3} {'type': 'loss', 'content': 0.1186123788356781, 'timestamp': '2025-10-01 04:47:59.381712', 'step': 21933, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:59.442095', 'step': 21933, 'epoch': 3} {'type': 'loss', 'content': 0.11559686809778214, 'timestamp': '2025-10-01 04:47:59.444415', 'step': 21934, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:59.500412', 'step': 21934, 'epoch': 3} {'type': 'loss', 'content': 0.06580830365419388, 'timestamp': '2025-10-01 04:47:59.502742', 'step': 21935, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:59.559890', 'step': 21935, 'epoch': 3} {'type': 'loss', 'content': 0.08873343467712402, 'timestamp': '2025-10-01 04:47:59.566636', 'step': 21936, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:59.622745', 'step': 21936, 'epoch': 3} {'type': 'loss', 'content': 0.1266082227230072, 'timestamp': '2025-10-01 04:47:59.625255', 'step': 21937, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:59.681481', 'step': 21937, 'epoch': 3} {'type': 'loss', 'content': 0.14584603905677795, 'timestamp': '2025-10-01 04:47:59.683801', 'step': 21938, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:47:59.739748', 'step': 21938, 'epoch': 3} {'type': 'loss', 'content': 0.08027172088623047, 'timestamp': '2025-10-01 04:47:59.741992', 'step': 21939, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:47:59.797117', 'step': 21939, 'epoch': 3} {'type': 'loss', 'content': 0.07761611044406891, 'timestamp': '2025-10-01 04:47:59.803552', 'step': 21940, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:59.856473', 'step': 21940, 'epoch': 3} {'type': 'loss', 'content': 0.11271513998508453, 'timestamp': '2025-10-01 04:47:59.859206', 'step': 21941, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:47:59.915004', 'step': 21941, 'epoch': 3} {'type': 'loss', 'content': 0.07746772468090057, 'timestamp': '2025-10-01 04:47:59.917197', 'step': 21942, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:47:59.970734', 'step': 21942, 'epoch': 3} {'type': 'loss', 'content': 0.062418822199106216, 'timestamp': '2025-10-01 04:47:59.972950', 'step': 21943, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:00.026128', 'step': 21943, 'epoch': 3} {'type': 'loss', 'content': 0.025689469650387764, 'timestamp': '2025-10-01 04:48:00.031889', 'step': 21944, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:00.084946', 'step': 21944, 'epoch': 3} {'type': 'loss', 'content': 0.11474813520908356, 'timestamp': '2025-10-01 04:48:00.087278', 'step': 21945, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:00.141068', 'step': 21945, 'epoch': 3} {'type': 'loss', 'content': 0.11174676567316055, 'timestamp': '2025-10-01 04:48:00.148340', 'step': 21946, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:00.202391', 'step': 21946, 'epoch': 3} {'type': 'loss', 'content': 0.08397745341062546, 'timestamp': '2025-10-01 04:48:00.205667', 'step': 21947, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:00.259616', 'step': 21947, 'epoch': 3} {'type': 'loss', 'content': 0.04913059622049332, 'timestamp': '2025-10-01 04:48:00.265761', 'step': 21948, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:48:00.319745', 'step': 21948, 'epoch': 3} {'type': 'loss', 'content': 0.07451951503753662, 'timestamp': '2025-10-01 04:48:00.328189', 'step': 21949, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:00.381811', 'step': 21949, 'epoch': 3} {'type': 'loss', 'content': 0.08241653442382812, 'timestamp': '2025-10-01 04:48:00.384039', 'step': 21950, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:00.437702', 'step': 21950, 'epoch': 3} {'type': 'loss', 'content': 0.0404227115213871, 'timestamp': '2025-10-01 04:48:00.439932', 'step': 21951, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:48:00.493199', 'step': 21951, 'epoch': 3} {'type': 'loss', 'content': 0.046050820499658585, 'timestamp': '2025-10-01 04:48:00.499067', 'step': 21952, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:00.552005', 'step': 21952, 'epoch': 3} {'type': 'loss', 'content': 0.07190267741680145, 'timestamp': '2025-10-01 04:48:00.554322', 'step': 21953, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:00.607924', 'step': 21953, 'epoch': 3} {'type': 'loss', 'content': 0.0797111988067627, 'timestamp': '2025-10-01 04:48:00.610109', 'step': 21954, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:00.663956', 'step': 21954, 'epoch': 3} {'type': 'loss', 'content': 0.06171640381217003, 'timestamp': '2025-10-01 04:48:00.666157', 'step': 21955, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:00.719179', 'step': 21955, 'epoch': 3} {'type': 'loss', 'content': 0.059277478605508804, 'timestamp': '2025-10-01 04:48:00.725050', 'step': 21956, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:48:00.777759', 'step': 21956, 'epoch': 3} {'type': 'loss', 'content': 0.07701250910758972, 'timestamp': '2025-10-01 04:48:00.780439', 'step': 21957, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:00.833920', 'step': 21957, 'epoch': 3} {'type': 'loss', 'content': 0.07285923510789871, 'timestamp': '2025-10-01 04:48:00.836375', 'step': 21958, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:48:00.889778', 'step': 21958, 'epoch': 3} {'type': 'loss', 'content': 0.1108350157737732, 'timestamp': '2025-10-01 04:48:00.892291', 'step': 21959, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:00.946030', 'step': 21959, 'epoch': 3} {'type': 'loss', 'content': 0.09781727939844131, 'timestamp': '2025-10-01 04:48:00.951969', 'step': 21960, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:01.004467', 'step': 21960, 'epoch': 3} {'type': 'loss', 'content': 0.053672581911087036, 'timestamp': '2025-10-01 04:48:01.006998', 'step': 21961, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:01.060350', 'step': 21961, 'epoch': 3} {'type': 'loss', 'content': 0.14985790848731995, 'timestamp': '2025-10-01 04:48:01.062815', 'step': 21962, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:01.116528', 'step': 21962, 'epoch': 3} {'type': 'loss', 'content': 0.13723482191562653, 'timestamp': '2025-10-01 04:48:01.118713', 'step': 21963, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:01.171814', 'step': 21963, 'epoch': 3} {'type': 'loss', 'content': 0.0817846804857254, 'timestamp': '2025-10-01 04:48:01.177663', 'step': 21964, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:01.230230', 'step': 21964, 'epoch': 3} {'type': 'loss', 'content': 0.06353814899921417, 'timestamp': '2025-10-01 04:48:01.233656', 'step': 21965, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:01.291280', 'step': 21965, 'epoch': 3} {'type': 'loss', 'content': 0.031923506408929825, 'timestamp': '2025-10-01 04:48:01.293798', 'step': 21966, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:01.349487', 'step': 21966, 'epoch': 3} {'type': 'loss', 'content': 0.10928300768136978, 'timestamp': '2025-10-01 04:48:01.351803', 'step': 21967, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:01.404493', 'step': 21967, 'epoch': 3} {'type': 'loss', 'content': 0.08388029783964157, 'timestamp': '2025-10-01 04:48:01.410247', 'step': 21968, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:01.462593', 'step': 21968, 'epoch': 3} {'type': 'loss', 'content': 0.07679443806409836, 'timestamp': '2025-10-01 04:48:01.464701', 'step': 21969, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:01.518076', 'step': 21969, 'epoch': 3} {'type': 'loss', 'content': 0.054584402590990067, 'timestamp': '2025-10-01 04:48:01.520282', 'step': 21970, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:01.573251', 'step': 21970, 'epoch': 3} {'type': 'loss', 'content': 0.12050279974937439, 'timestamp': '2025-10-01 04:48:01.575393', 'step': 21971, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:48:01.628334', 'step': 21971, 'epoch': 3} {'type': 'loss', 'content': 0.15029120445251465, 'timestamp': '2025-10-01 04:48:01.634252', 'step': 21972, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:01.686972', 'step': 21972, 'epoch': 3} {'type': 'loss', 'content': 0.13374289870262146, 'timestamp': '2025-10-01 04:48:01.689181', 'step': 21973, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:48:01.743195', 'step': 21973, 'epoch': 3} {'type': 'loss', 'content': 0.07974988222122192, 'timestamp': '2025-10-01 04:48:01.745537', 'step': 21974, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:01.798446', 'step': 21974, 'epoch': 3} {'type': 'loss', 'content': 0.12259279191493988, 'timestamp': '2025-10-01 04:48:01.800668', 'step': 21975, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:01.853930', 'step': 21975, 'epoch': 3} {'type': 'loss', 'content': 0.05094810575246811, 'timestamp': '2025-10-01 04:48:01.859708', 'step': 21976, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:48:01.912308', 'step': 21976, 'epoch': 3} {'type': 'loss', 'content': 0.1282789260149002, 'timestamp': '2025-10-01 04:48:01.914380', 'step': 21977, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:48:01.967154', 'step': 21977, 'epoch': 3} {'type': 'loss', 'content': 0.13814443349838257, 'timestamp': '2025-10-01 04:48:01.969430', 'step': 21978, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:48:02.024260', 'step': 21978, 'epoch': 3} {'type': 'loss', 'content': 0.1669546216726303, 'timestamp': '2025-10-01 04:48:02.026460', 'step': 21979, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:48:02.079562', 'step': 21979, 'epoch': 3} {'type': 'loss', 'content': 0.17733408510684967, 'timestamp': '2025-10-01 04:48:02.085400', 'step': 21980, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:48:02.138580', 'step': 21980, 'epoch': 3} {'type': 'loss', 'content': 0.08949390798807144, 'timestamp': '2025-10-01 04:48:02.140801', 'step': 21981, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:48:02.193988', 'step': 21981, 'epoch': 3} {'type': 'loss', 'content': 0.17213194072246552, 'timestamp': '2025-10-01 04:48:02.196171', 'step': 21982, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:02.249066', 'step': 21982, 'epoch': 3} {'type': 'loss', 'content': 0.14493784308433533, 'timestamp': '2025-10-01 04:48:02.251445', 'step': 21983, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:48:02.304555', 'step': 21983, 'epoch': 3} {'type': 'loss', 'content': 0.12083635479211807, 'timestamp': '2025-10-01 04:48:02.310238', 'step': 21984, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:02.363645', 'step': 21984, 'epoch': 3} {'type': 'loss', 'content': 0.06805749237537384, 'timestamp': '2025-10-01 04:48:02.365779', 'step': 21985, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:02.419142', 'step': 21985, 'epoch': 3} {'type': 'loss', 'content': 0.13427439332008362, 'timestamp': '2025-10-01 04:48:02.421374', 'step': 21986, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:48:02.474869', 'step': 21986, 'epoch': 3} {'type': 'loss', 'content': 0.15965187549591064, 'timestamp': '2025-10-01 04:48:02.477150', 'step': 21987, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:02.530267', 'step': 21987, 'epoch': 3} {'type': 'loss', 'content': 0.03192080184817314, 'timestamp': '2025-10-01 04:48:02.536117', 'step': 21988, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:02.588809', 'step': 21988, 'epoch': 3} {'type': 'loss', 'content': 0.07021528482437134, 'timestamp': '2025-10-01 04:48:02.591196', 'step': 21989, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:02.644396', 'step': 21989, 'epoch': 3} {'type': 'loss', 'content': 0.06756633520126343, 'timestamp': '2025-10-01 04:48:02.646457', 'step': 21990, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:02.699582', 'step': 21990, 'epoch': 3} {'type': 'loss', 'content': 0.07434463500976562, 'timestamp': '2025-10-01 04:48:02.701701', 'step': 21991, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:02.754698', 'step': 21991, 'epoch': 3} {'type': 'loss', 'content': 0.07539612799882889, 'timestamp': '2025-10-01 04:48:02.761800', 'step': 21992, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:02.814181', 'step': 21992, 'epoch': 3} {'type': 'loss', 'content': 0.0456652007997036, 'timestamp': '2025-10-01 04:48:02.816329', 'step': 21993, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:02.869252', 'step': 21993, 'epoch': 3} {'type': 'loss', 'content': 0.10941071063280106, 'timestamp': '2025-10-01 04:48:02.871452', 'step': 21994, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:02.924629', 'step': 21994, 'epoch': 3} {'type': 'loss', 'content': 0.06302378326654434, 'timestamp': '2025-10-01 04:48:02.926797', 'step': 21995, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:02.980166', 'step': 21995, 'epoch': 3} {'type': 'loss', 'content': 0.07546138018369675, 'timestamp': '2025-10-01 04:48:02.985900', 'step': 21996, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:03.038480', 'step': 21996, 'epoch': 3} {'type': 'loss', 'content': 0.05846978351473808, 'timestamp': '2025-10-01 04:48:03.040685', 'step': 21997, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:48:03.093995', 'step': 21997, 'epoch': 3} {'type': 'loss', 'content': 0.11995601654052734, 'timestamp': '2025-10-01 04:48:03.096095', 'step': 21998, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:03.149367', 'step': 21998, 'epoch': 3} {'type': 'loss', 'content': 0.05123480036854744, 'timestamp': '2025-10-01 04:48:03.151491', 'step': 21999, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:03.204136', 'step': 21999, 'epoch': 3} {'type': 'loss', 'content': 0.10535784810781479, 'timestamp': '2025-10-01 04:48:03.209823', 'step': 22000, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 22000', 'timestamp': '2025-10-01 04:48:03.591388', 'step': 22000, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:03.647741', 'step': 22000, 'epoch': 3} {'type': 'loss', 'content': 0.15807929635047913, 'timestamp': '2025-10-01 04:48:03.650452', 'step': 22001, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:03.707006', 'step': 22001, 'epoch': 3} {'type': 'loss', 'content': 0.057387106120586395, 'timestamp': '2025-10-01 04:48:03.709513', 'step': 22002, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:03.764389', 'step': 22002, 'epoch': 3} {'type': 'loss', 'content': 0.08014818280935287, 'timestamp': '2025-10-01 04:48:03.766582', 'step': 22003, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:03.819782', 'step': 22003, 'epoch': 3} {'type': 'loss', 'content': 0.09712222963571548, 'timestamp': '2025-10-01 04:48:03.825946', 'step': 22004, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:48:03.882818', 'step': 22004, 'epoch': 3} {'type': 'loss', 'content': 0.14295615255832672, 'timestamp': '2025-10-01 04:48:03.885043', 'step': 22005, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:03.938166', 'step': 22005, 'epoch': 3} {'type': 'loss', 'content': 0.09050197154283524, 'timestamp': '2025-10-01 04:48:03.940429', 'step': 22006, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:03.995753', 'step': 22006, 'epoch': 3} {'type': 'loss', 'content': 0.10968814045190811, 'timestamp': '2025-10-01 04:48:03.998081', 'step': 22007, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:04.058400', 'step': 22007, 'epoch': 3} {'type': 'loss', 'content': 0.11387898027896881, 'timestamp': '2025-10-01 04:48:04.064642', 'step': 22008, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:04.117245', 'step': 22008, 'epoch': 3} {'type': 'loss', 'content': 0.05060264840722084, 'timestamp': '2025-10-01 04:48:04.119376', 'step': 22009, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:48:04.172823', 'step': 22009, 'epoch': 3} {'type': 'loss', 'content': 0.007126297801733017, 'timestamp': '2025-10-01 04:48:04.174932', 'step': 22010, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:04.228136', 'step': 22010, 'epoch': 3} {'type': 'loss', 'content': 0.07437947392463684, 'timestamp': '2025-10-01 04:48:04.230289', 'step': 22011, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:04.283339', 'step': 22011, 'epoch': 3} {'type': 'loss', 'content': 0.03325599431991577, 'timestamp': '2025-10-01 04:48:04.289056', 'step': 22012, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:48:04.341620', 'step': 22012, 'epoch': 3} {'type': 'loss', 'content': 0.06461802124977112, 'timestamp': '2025-10-01 04:48:04.343789', 'step': 22013, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:04.397195', 'step': 22013, 'epoch': 3} {'type': 'loss', 'content': 0.03536621108651161, 'timestamp': '2025-10-01 04:48:04.399371', 'step': 22014, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:04.452809', 'step': 22014, 'epoch': 3} {'type': 'loss', 'content': 0.17047587037086487, 'timestamp': '2025-10-01 04:48:04.455083', 'step': 22015, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:48:04.508162', 'step': 22015, 'epoch': 3} {'type': 'loss', 'content': 0.1043611690402031, 'timestamp': '2025-10-01 04:48:04.514017', 'step': 22016, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:04.566789', 'step': 22016, 'epoch': 3} {'type': 'loss', 'content': 0.06657668948173523, 'timestamp': '2025-10-01 04:48:04.568688', 'step': 22017, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:48:04.621451', 'step': 22017, 'epoch': 3} {'type': 'loss', 'content': 0.08563453704118729, 'timestamp': '2025-10-01 04:48:04.623609', 'step': 22018, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:04.677365', 'step': 22018, 'epoch': 3} {'type': 'loss', 'content': 0.041754454374313354, 'timestamp': '2025-10-01 04:48:04.679533', 'step': 22019, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:04.733698', 'step': 22019, 'epoch': 3} {'type': 'loss', 'content': 0.09940749406814575, 'timestamp': '2025-10-01 04:48:04.739353', 'step': 22020, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:04.791849', 'step': 22020, 'epoch': 3} {'type': 'loss', 'content': 0.12380655109882355, 'timestamp': '2025-10-01 04:48:04.793945', 'step': 22021, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:48:04.846859', 'step': 22021, 'epoch': 3} {'type': 'loss', 'content': 0.14363454282283783, 'timestamp': '2025-10-01 04:48:04.849206', 'step': 22022, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:04.902320', 'step': 22022, 'epoch': 3} {'type': 'loss', 'content': 0.07782530039548874, 'timestamp': '2025-10-01 04:48:04.904557', 'step': 22023, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:48:04.957451', 'step': 22023, 'epoch': 3} {'type': 'loss', 'content': 0.18890361487865448, 'timestamp': '2025-10-01 04:48:04.963274', 'step': 22024, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:05.016392', 'step': 22024, 'epoch': 3} {'type': 'loss', 'content': 0.062075890600681305, 'timestamp': '2025-10-01 04:48:05.018589', 'step': 22025, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:48:05.071474', 'step': 22025, 'epoch': 3} {'type': 'loss', 'content': 0.12579749524593353, 'timestamp': '2025-10-01 04:48:05.073960', 'step': 22026, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:05.138058', 'step': 22026, 'epoch': 3} {'type': 'loss', 'content': 0.028133215382695198, 'timestamp': '2025-10-01 04:48:05.140210', 'step': 22027, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:48:05.193809', 'step': 22027, 'epoch': 3} {'type': 'loss', 'content': 0.05137161538004875, 'timestamp': '2025-10-01 04:48:05.199517', 'step': 22028, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:48:05.255256', 'step': 22028, 'epoch': 3} {'type': 'loss', 'content': 0.025943294167518616, 'timestamp': '2025-10-01 04:48:05.257386', 'step': 22029, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:48:05.310845', 'step': 22029, 'epoch': 3} {'type': 'loss', 'content': 0.06884274631738663, 'timestamp': '2025-10-01 04:48:05.313316', 'step': 22030, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:48:05.371212', 'step': 22030, 'epoch': 3} {'type': 'loss', 'content': 0.11781101673841476, 'timestamp': '2025-10-01 04:48:05.373474', 'step': 22031, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:48:05.429544', 'step': 22031, 'epoch': 3} {'type': 'loss', 'content': 0.15177537500858307, 'timestamp': '2025-10-01 04:48:05.435551', 'step': 22032, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:05.489503', 'step': 22032, 'epoch': 3} {'type': 'loss', 'content': 0.10296163707971573, 'timestamp': '2025-10-01 04:48:05.491826', 'step': 22033, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:05.544864', 'step': 22033, 'epoch': 3} {'type': 'loss', 'content': 0.04244309291243553, 'timestamp': '2025-10-01 04:48:05.547037', 'step': 22034, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:05.601301', 'step': 22034, 'epoch': 3} {'type': 'loss', 'content': 0.11230259388685226, 'timestamp': '2025-10-01 04:48:05.603457', 'step': 22035, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:05.657266', 'step': 22035, 'epoch': 3} {'type': 'loss', 'content': 0.03846536576747894, 'timestamp': '2025-10-01 04:48:05.662883', 'step': 22036, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:48:05.715826', 'step': 22036, 'epoch': 3} {'type': 'loss', 'content': 0.10292262583971024, 'timestamp': '2025-10-01 04:48:05.718105', 'step': 22037, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:48:05.771456', 'step': 22037, 'epoch': 3} {'type': 'loss', 'content': 0.15595094859600067, 'timestamp': '2025-10-01 04:48:05.774698', 'step': 22038, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:05.832286', 'step': 22038, 'epoch': 3} {'type': 'loss', 'content': 0.029802925884723663, 'timestamp': '2025-10-01 04:48:05.834412', 'step': 22039, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:48:05.887377', 'step': 22039, 'epoch': 3} {'type': 'loss', 'content': 0.06542687863111496, 'timestamp': '2025-10-01 04:48:05.893560', 'step': 22040, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:05.947084', 'step': 22040, 'epoch': 3} {'type': 'loss', 'content': 0.1096319630742073, 'timestamp': '2025-10-01 04:48:05.949155', 'step': 22041, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:48:06.002347', 'step': 22041, 'epoch': 3} {'type': 'loss', 'content': 0.08019313961267471, 'timestamp': '2025-10-01 04:48:06.004581', 'step': 22042, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:06.058324', 'step': 22042, 'epoch': 3} {'type': 'loss', 'content': 0.04134045168757439, 'timestamp': '2025-10-01 04:48:06.060506', 'step': 22043, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:06.114814', 'step': 22043, 'epoch': 3} {'type': 'loss', 'content': 0.0638342797756195, 'timestamp': '2025-10-01 04:48:06.120676', 'step': 22044, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:06.173553', 'step': 22044, 'epoch': 3} {'type': 'loss', 'content': 0.05934511870145798, 'timestamp': '2025-10-01 04:48:06.175948', 'step': 22045, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:48:06.229085', 'step': 22045, 'epoch': 3} {'type': 'loss', 'content': 0.051186516880989075, 'timestamp': '2025-10-01 04:48:06.231261', 'step': 22046, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:06.284718', 'step': 22046, 'epoch': 3} {'type': 'loss', 'content': 0.079083651304245, 'timestamp': '2025-10-01 04:48:06.286875', 'step': 22047, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:48:06.339850', 'step': 22047, 'epoch': 3} {'type': 'loss', 'content': 0.1254751831293106, 'timestamp': '2025-10-01 04:48:06.345553', 'step': 22048, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:48:06.398683', 'step': 22048, 'epoch': 3} {'type': 'loss', 'content': 0.05666473135352135, 'timestamp': '2025-10-01 04:48:06.401311', 'step': 22049, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:06.455371', 'step': 22049, 'epoch': 3} {'type': 'loss', 'content': 0.10589117556810379, 'timestamp': '2025-10-01 04:48:06.458064', 'step': 22050, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:06.511215', 'step': 22050, 'epoch': 3} {'type': 'loss', 'content': 0.07197887450456619, 'timestamp': '2025-10-01 04:48:06.513408', 'step': 22051, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:06.570191', 'step': 22051, 'epoch': 3} {'type': 'loss', 'content': 0.06076744943857193, 'timestamp': '2025-10-01 04:48:06.576219', 'step': 22052, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:06.629202', 'step': 22052, 'epoch': 3} {'type': 'loss', 'content': 0.08638401329517365, 'timestamp': '2025-10-01 04:48:06.631475', 'step': 22053, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:48:06.684691', 'step': 22053, 'epoch': 3} {'type': 'loss', 'content': 0.06143632531166077, 'timestamp': '2025-10-01 04:48:06.686952', 'step': 22054, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:48:06.740685', 'step': 22054, 'epoch': 3} {'type': 'loss', 'content': 0.08951087296009064, 'timestamp': '2025-10-01 04:48:06.742797', 'step': 22055, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:06.796347', 'step': 22055, 'epoch': 3} {'type': 'loss', 'content': 0.07531844824552536, 'timestamp': '2025-10-01 04:48:06.802236', 'step': 22056, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:06.855693', 'step': 22056, 'epoch': 3} {'type': 'loss', 'content': 0.048676054924726486, 'timestamp': '2025-10-01 04:48:06.857890', 'step': 22057, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:48:06.912242', 'step': 22057, 'epoch': 3} {'type': 'loss', 'content': 0.03661957010626793, 'timestamp': '2025-10-01 04:48:06.914681', 'step': 22058, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:48:06.968946', 'step': 22058, 'epoch': 3} {'type': 'loss', 'content': 0.04196489229798317, 'timestamp': '2025-10-01 04:48:06.971181', 'step': 22059, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:07.024888', 'step': 22059, 'epoch': 3} {'type': 'loss', 'content': 0.051364921033382416, 'timestamp': '2025-10-01 04:48:07.030742', 'step': 22060, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:07.084393', 'step': 22060, 'epoch': 3} {'type': 'loss', 'content': 0.057215362787246704, 'timestamp': '2025-10-01 04:48:07.086547', 'step': 22061, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:07.139532', 'step': 22061, 'epoch': 3} {'type': 'loss', 'content': 0.02401992492377758, 'timestamp': '2025-10-01 04:48:07.141863', 'step': 22062, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:48:07.195115', 'step': 22062, 'epoch': 3} {'type': 'loss', 'content': 0.10325601696968079, 'timestamp': '2025-10-01 04:48:07.197143', 'step': 22063, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:07.250739', 'step': 22063, 'epoch': 3} {'type': 'loss', 'content': 0.1480506956577301, 'timestamp': '2025-10-01 04:48:07.258305', 'step': 22064, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:48:07.312540', 'step': 22064, 'epoch': 3} {'type': 'loss', 'content': 0.01893778145313263, 'timestamp': '2025-10-01 04:48:07.314923', 'step': 22065, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:48:07.369326', 'step': 22065, 'epoch': 3} {'type': 'loss', 'content': 0.09368278086185455, 'timestamp': '2025-10-01 04:48:07.371500', 'step': 22066, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:48:07.425091', 'step': 22066, 'epoch': 3} {'type': 'loss', 'content': 0.0548403263092041, 'timestamp': '2025-10-01 04:48:07.427313', 'step': 22067, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:07.481037', 'step': 22067, 'epoch': 3} {'type': 'loss', 'content': 0.05574386939406395, 'timestamp': '2025-10-01 04:48:07.486704', 'step': 22068, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:48:07.539568', 'step': 22068, 'epoch': 3} {'type': 'loss', 'content': 0.11422182619571686, 'timestamp': '2025-10-01 04:48:07.541655', 'step': 22069, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:07.594997', 'step': 22069, 'epoch': 3} {'type': 'loss', 'content': 0.08436903357505798, 'timestamp': '2025-10-01 04:48:07.597211', 'step': 22070, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:07.650701', 'step': 22070, 'epoch': 3} {'type': 'loss', 'content': 0.05922471359372139, 'timestamp': '2025-10-01 04:48:07.652999', 'step': 22071, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:07.706911', 'step': 22071, 'epoch': 3} {'type': 'loss', 'content': 0.08922547847032547, 'timestamp': '2025-10-01 04:48:07.712901', 'step': 22072, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:07.766124', 'step': 22072, 'epoch': 3} {'type': 'loss', 'content': 0.06354352086782455, 'timestamp': '2025-10-01 04:48:07.768546', 'step': 22073, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:48:07.829985', 'step': 22073, 'epoch': 3} {'type': 'loss', 'content': 0.0514272041618824, 'timestamp': '2025-10-01 04:48:07.832344', 'step': 22074, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:48:07.885530', 'step': 22074, 'epoch': 3} {'type': 'loss', 'content': 0.05606374517083168, 'timestamp': '2025-10-01 04:48:07.887827', 'step': 22075, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:07.941687', 'step': 22075, 'epoch': 3} {'type': 'loss', 'content': 0.04728482663631439, 'timestamp': '2025-10-01 04:48:07.947420', 'step': 22076, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:48:08.000703', 'step': 22076, 'epoch': 3} {'type': 'loss', 'content': 0.04387512430548668, 'timestamp': '2025-10-01 04:48:08.002982', 'step': 22077, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-01 04:48:08.056824', 'step': 22077, 'epoch': 3} {'type': 'loss', 'content': 0.08270258456468582, 'timestamp': '2025-10-01 04:48:08.060071', 'step': 22078, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:48:08.113544', 'step': 22078, 'epoch': 3} {'type': 'loss', 'content': 0.15110710263252258, 'timestamp': '2025-10-01 04:48:08.115739', 'step': 22079, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:08.170589', 'step': 22079, 'epoch': 3} {'type': 'loss', 'content': 0.027249043807387352, 'timestamp': '2025-10-01 04:48:08.176839', 'step': 22080, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:08.229963', 'step': 22080, 'epoch': 3} {'type': 'loss', 'content': 0.1752813756465912, 'timestamp': '2025-10-01 04:48:08.232302', 'step': 22081, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:48:08.285562', 'step': 22081, 'epoch': 3} {'type': 'loss', 'content': 0.09822169691324234, 'timestamp': '2025-10-01 04:48:08.287729', 'step': 22082, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:08.341505', 'step': 22082, 'epoch': 3} {'type': 'loss', 'content': 0.10759181529283524, 'timestamp': '2025-10-01 04:48:08.346065', 'step': 22083, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:48:08.404360', 'step': 22083, 'epoch': 3} {'type': 'loss', 'content': 0.07499042898416519, 'timestamp': '2025-10-01 04:48:08.411986', 'step': 22084, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:08.464830', 'step': 22084, 'epoch': 3} {'type': 'loss', 'content': 0.0958203598856926, 'timestamp': '2025-10-01 04:48:08.467797', 'step': 22085, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:08.535204', 'step': 22085, 'epoch': 3} {'type': 'loss', 'content': 0.08389827609062195, 'timestamp': '2025-10-01 04:48:08.537296', 'step': 22086, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:08.594817', 'step': 22086, 'epoch': 3} {'type': 'loss', 'content': 0.1797996163368225, 'timestamp': '2025-10-01 04:48:08.597093', 'step': 22087, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:48:08.651163', 'step': 22087, 'epoch': 3} {'type': 'loss', 'content': 0.056791823357343674, 'timestamp': '2025-10-01 04:48:08.656758', 'step': 22088, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:48:08.709674', 'step': 22088, 'epoch': 3} {'type': 'loss', 'content': 0.10893932729959488, 'timestamp': '2025-10-01 04:48:08.711749', 'step': 22089, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:48:08.766420', 'step': 22089, 'epoch': 3} {'type': 'loss', 'content': 0.044958584010601044, 'timestamp': '2025-10-01 04:48:08.768944', 'step': 22090, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:48:08.822371', 'step': 22090, 'epoch': 3} {'type': 'loss', 'content': 0.09220685064792633, 'timestamp': '2025-10-01 04:48:08.824667', 'step': 22091, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:48:08.883310', 'step': 22091, 'epoch': 3} {'type': 'loss', 'content': 0.07545219361782074, 'timestamp': '2025-10-01 04:48:08.889343', 'step': 22092, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:08.943371', 'step': 22092, 'epoch': 3} {'type': 'loss', 'content': 0.050760529935359955, 'timestamp': '2025-10-01 04:48:08.945983', 'step': 22093, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:09.000407', 'step': 22093, 'epoch': 3} {'type': 'loss', 'content': 0.08803217858076096, 'timestamp': '2025-10-01 04:48:09.002810', 'step': 22094, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:09.057258', 'step': 22094, 'epoch': 3} {'type': 'loss', 'content': 0.04970000311732292, 'timestamp': '2025-10-01 04:48:09.059781', 'step': 22095, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:09.113998', 'step': 22095, 'epoch': 3} {'type': 'loss', 'content': 0.0746975988149643, 'timestamp': '2025-10-01 04:48:09.119934', 'step': 22096, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:48:09.173875', 'step': 22096, 'epoch': 3} {'type': 'loss', 'content': 0.1309729367494583, 'timestamp': '2025-10-01 04:48:09.176254', 'step': 22097, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:48:09.229820', 'step': 22097, 'epoch': 3} {'type': 'loss', 'content': 0.06553548574447632, 'timestamp': '2025-10-01 04:48:09.232254', 'step': 22098, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:09.286698', 'step': 22098, 'epoch': 3} {'type': 'loss', 'content': 0.03356695920228958, 'timestamp': '2025-10-01 04:48:09.289236', 'step': 22099, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:48:09.342867', 'step': 22099, 'epoch': 3} {'type': 'loss', 'content': 0.09861999750137329, 'timestamp': '2025-10-01 04:48:09.349220', 'step': 22100, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:09.406114', 'step': 22100, 'epoch': 3} {'type': 'loss', 'content': 0.14540192484855652, 'timestamp': '2025-10-01 04:48:09.408798', 'step': 22101, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:09.464014', 'step': 22101, 'epoch': 3} {'type': 'loss', 'content': 0.09104031324386597, 'timestamp': '2025-10-01 04:48:09.470413', 'step': 22102, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:09.524816', 'step': 22102, 'epoch': 3} {'type': 'loss', 'content': 0.05274764448404312, 'timestamp': '2025-10-01 04:48:09.527010', 'step': 22103, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:48:09.580993', 'step': 22103, 'epoch': 3} {'type': 'loss', 'content': 0.12979097664356232, 'timestamp': '2025-10-01 04:48:09.587316', 'step': 22104, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:48:09.640116', 'step': 22104, 'epoch': 3} {'type': 'loss', 'content': 0.14938247203826904, 'timestamp': '2025-10-01 04:48:09.642369', 'step': 22105, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:48:09.696305', 'step': 22105, 'epoch': 3} {'type': 'loss', 'content': 0.05399193614721298, 'timestamp': '2025-10-01 04:48:09.698659', 'step': 22106, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:48:09.752245', 'step': 22106, 'epoch': 3} {'type': 'loss', 'content': 0.11398568749427795, 'timestamp': '2025-10-01 04:48:09.754736', 'step': 22107, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:48:09.808845', 'step': 22107, 'epoch': 3} {'type': 'loss', 'content': 0.09252525866031647, 'timestamp': '2025-10-01 04:48:09.828786', 'step': 22108, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:09.882996', 'step': 22108, 'epoch': 3} {'type': 'loss', 'content': 0.09217435121536255, 'timestamp': '2025-10-01 04:48:09.885427', 'step': 22109, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:09.939791', 'step': 22109, 'epoch': 3} {'type': 'loss', 'content': 0.058905430138111115, 'timestamp': '2025-10-01 04:48:09.942153', 'step': 22110, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:48:09.996005', 'step': 22110, 'epoch': 3} {'type': 'loss', 'content': 0.11013643443584442, 'timestamp': '2025-10-01 04:48:09.998787', 'step': 22111, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:48:10.052697', 'step': 22111, 'epoch': 3} {'type': 'loss', 'content': 0.09754873812198639, 'timestamp': '2025-10-01 04:48:10.058713', 'step': 22112, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:48:10.111915', 'step': 22112, 'epoch': 3} {'type': 'loss', 'content': 0.1614135503768921, 'timestamp': '2025-10-01 04:48:10.114034', 'step': 22113, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:48:10.167657', 'step': 22113, 'epoch': 3} {'type': 'loss', 'content': 0.08775290846824646, 'timestamp': '2025-10-01 04:48:10.170216', 'step': 22114, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:10.228695', 'step': 22114, 'epoch': 3} {'type': 'loss', 'content': 0.12330786138772964, 'timestamp': '2025-10-01 04:48:10.233709', 'step': 22115, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:48:10.288213', 'step': 22115, 'epoch': 3} {'type': 'loss', 'content': 0.09271492063999176, 'timestamp': '2025-10-01 04:48:10.294126', 'step': 22116, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:10.347033', 'step': 22116, 'epoch': 3} {'type': 'loss', 'content': 0.1342150866985321, 'timestamp': '2025-10-01 04:48:10.349150', 'step': 22117, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:48:10.401996', 'step': 22117, 'epoch': 3} {'type': 'loss', 'content': 0.0360652394592762, 'timestamp': '2025-10-01 04:48:10.404159', 'step': 22118, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:48:10.458388', 'step': 22118, 'epoch': 3} {'type': 'loss', 'content': 0.06902971863746643, 'timestamp': '2025-10-01 04:48:10.460478', 'step': 22119, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:10.513402', 'step': 22119, 'epoch': 3} {'type': 'loss', 'content': 0.10476882010698318, 'timestamp': '2025-10-01 04:48:10.519399', 'step': 22120, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:10.572484', 'step': 22120, 'epoch': 3} {'type': 'loss', 'content': 0.04762980341911316, 'timestamp': '2025-10-01 04:48:10.574631', 'step': 22121, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:48:10.628152', 'step': 22121, 'epoch': 3} {'type': 'loss', 'content': 0.0841197669506073, 'timestamp': '2025-10-01 04:48:10.630319', 'step': 22122, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:10.683735', 'step': 22122, 'epoch': 3} {'type': 'loss', 'content': 0.16721460223197937, 'timestamp': '2025-10-01 04:48:10.685881', 'step': 22123, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:48:10.739370', 'step': 22123, 'epoch': 3} {'type': 'loss', 'content': 0.08576850593090057, 'timestamp': '2025-10-01 04:48:10.745087', 'step': 22124, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:10.797935', 'step': 22124, 'epoch': 3} {'type': 'loss', 'content': 0.08350574970245361, 'timestamp': '2025-10-01 04:48:10.800066', 'step': 22125, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:10.853142', 'step': 22125, 'epoch': 3} {'type': 'loss', 'content': 0.0932609960436821, 'timestamp': '2025-10-01 04:48:10.855485', 'step': 22126, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:10.909052', 'step': 22126, 'epoch': 3} {'type': 'loss', 'content': 0.17331171035766602, 'timestamp': '2025-10-01 04:48:10.911211', 'step': 22127, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:10.964446', 'step': 22127, 'epoch': 3} {'type': 'loss', 'content': 0.10097646713256836, 'timestamp': '2025-10-01 04:48:10.970192', 'step': 22128, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:48:11.023199', 'step': 22128, 'epoch': 3} {'type': 'loss', 'content': 0.0597120001912117, 'timestamp': '2025-10-01 04:48:11.025802', 'step': 22129, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:48:11.080416', 'step': 22129, 'epoch': 3} {'type': 'loss', 'content': 0.07663996517658234, 'timestamp': '2025-10-01 04:48:11.082715', 'step': 22130, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:11.136819', 'step': 22130, 'epoch': 3} {'type': 'loss', 'content': 0.06191512942314148, 'timestamp': '2025-10-01 04:48:11.139124', 'step': 22131, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:48:11.192428', 'step': 22131, 'epoch': 3} {'type': 'loss', 'content': 0.06943721324205399, 'timestamp': '2025-10-01 04:48:11.198074', 'step': 22132, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:11.251378', 'step': 22132, 'epoch': 3} {'type': 'loss', 'content': 0.07869606465101242, 'timestamp': '2025-10-01 04:48:11.253288', 'step': 22133, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:11.307326', 'step': 22133, 'epoch': 3} {'type': 'loss', 'content': 0.11408895254135132, 'timestamp': '2025-10-01 04:48:11.309550', 'step': 22134, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:11.364584', 'step': 22134, 'epoch': 3} {'type': 'loss', 'content': 0.06725876033306122, 'timestamp': '2025-10-01 04:48:11.366641', 'step': 22135, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:11.420144', 'step': 22135, 'epoch': 3} {'type': 'loss', 'content': 0.06410381942987442, 'timestamp': '2025-10-01 04:48:11.425859', 'step': 22136, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:11.478682', 'step': 22136, 'epoch': 3} {'type': 'loss', 'content': 0.0966048464179039, 'timestamp': '2025-10-01 04:48:11.480838', 'step': 22137, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:11.534166', 'step': 22137, 'epoch': 3} {'type': 'loss', 'content': 0.04563155025243759, 'timestamp': '2025-10-01 04:48:11.536286', 'step': 22138, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:11.589847', 'step': 22138, 'epoch': 3} {'type': 'loss', 'content': 0.055294495075941086, 'timestamp': '2025-10-01 04:48:11.592032', 'step': 22139, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:48:11.645143', 'step': 22139, 'epoch': 3} {'type': 'loss', 'content': 0.09826011955738068, 'timestamp': '2025-10-01 04:48:11.650936', 'step': 22140, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:11.718142', 'step': 22140, 'epoch': 3} {'type': 'loss', 'content': 0.0613427571952343, 'timestamp': '2025-10-01 04:48:11.720256', 'step': 22141, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:48:11.778499', 'step': 22141, 'epoch': 3} {'type': 'loss', 'content': 0.07169661670923233, 'timestamp': '2025-10-01 04:48:11.782431', 'step': 22142, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:11.835706', 'step': 22142, 'epoch': 3} {'type': 'loss', 'content': 0.09429685771465302, 'timestamp': '2025-10-01 04:48:11.845440', 'step': 22143, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:48:11.898722', 'step': 22143, 'epoch': 3} {'type': 'loss', 'content': 0.06237674504518509, 'timestamp': '2025-10-01 04:48:11.913762', 'step': 22144, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:48:11.966791', 'step': 22144, 'epoch': 3} {'type': 'loss', 'content': 0.12896983325481415, 'timestamp': '2025-10-01 04:48:11.969008', 'step': 22145, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:48:12.021843', 'step': 22145, 'epoch': 3} {'type': 'loss', 'content': 0.09972986578941345, 'timestamp': '2025-10-01 04:48:12.024041', 'step': 22146, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:48:12.077293', 'step': 22146, 'epoch': 3} {'type': 'loss', 'content': 0.06165323406457901, 'timestamp': '2025-10-01 04:48:12.079352', 'step': 22147, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:12.138661', 'step': 22147, 'epoch': 3} {'type': 'loss', 'content': 0.03659765049815178, 'timestamp': '2025-10-01 04:48:12.144102', 'step': 22148, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:48:12.197283', 'step': 22148, 'epoch': 3} {'type': 'loss', 'content': 0.06308145076036453, 'timestamp': '2025-10-01 04:48:12.199461', 'step': 22149, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:48:12.253258', 'step': 22149, 'epoch': 3} {'type': 'loss', 'content': 0.062449805438518524, 'timestamp': '2025-10-01 04:48:12.255407', 'step': 22150, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:12.310551', 'step': 22150, 'epoch': 3} {'type': 'loss', 'content': 0.08353875577449799, 'timestamp': '2025-10-01 04:48:12.312686', 'step': 22151, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:12.367040', 'step': 22151, 'epoch': 3} {'type': 'loss', 'content': 0.024703580886125565, 'timestamp': '2025-10-01 04:48:12.372892', 'step': 22152, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:12.427153', 'step': 22152, 'epoch': 3} {'type': 'loss', 'content': 0.05159715190529823, 'timestamp': '2025-10-01 04:48:12.429804', 'step': 22153, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:12.483278', 'step': 22153, 'epoch': 3} {'type': 'loss', 'content': 0.04159795492887497, 'timestamp': '2025-10-01 04:48:12.485434', 'step': 22154, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:12.538861', 'step': 22154, 'epoch': 3} {'type': 'loss', 'content': 0.012312970124185085, 'timestamp': '2025-10-01 04:48:12.541135', 'step': 22155, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:48:12.594647', 'step': 22155, 'epoch': 3} {'type': 'loss', 'content': 0.05214354768395424, 'timestamp': '2025-10-01 04:48:12.600314', 'step': 22156, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:12.654327', 'step': 22156, 'epoch': 3} {'type': 'loss', 'content': 0.06519560515880585, 'timestamp': '2025-10-01 04:48:12.656407', 'step': 22157, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:12.717600', 'step': 22157, 'epoch': 3} {'type': 'loss', 'content': 0.029753632843494415, 'timestamp': '2025-10-01 04:48:12.719888', 'step': 22158, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:12.774785', 'step': 22158, 'epoch': 3} {'type': 'loss', 'content': 0.11605379730463028, 'timestamp': '2025-10-01 04:48:12.777507', 'step': 22159, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:48:12.832223', 'step': 22159, 'epoch': 3} {'type': 'loss', 'content': 0.1356789767742157, 'timestamp': '2025-10-01 04:48:12.838630', 'step': 22160, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:48:12.892613', 'step': 22160, 'epoch': 3} {'type': 'loss', 'content': 0.06811936944723129, 'timestamp': '2025-10-01 04:48:12.894776', 'step': 22161, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:12.954499', 'step': 22161, 'epoch': 3} {'type': 'loss', 'content': 0.02903752028942108, 'timestamp': '2025-10-01 04:48:12.956823', 'step': 22162, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:13.011279', 'step': 22162, 'epoch': 3} {'type': 'loss', 'content': 0.16258977353572845, 'timestamp': '2025-10-01 04:48:13.013399', 'step': 22163, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:13.067742', 'step': 22163, 'epoch': 3} {'type': 'loss', 'content': 0.04415521025657654, 'timestamp': '2025-10-01 04:48:13.074536', 'step': 22164, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:13.128473', 'step': 22164, 'epoch': 3} {'type': 'loss', 'content': 0.10745842009782791, 'timestamp': '2025-10-01 04:48:13.130634', 'step': 22165, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:13.185933', 'step': 22165, 'epoch': 3} {'type': 'loss', 'content': 0.10052496939897537, 'timestamp': '2025-10-01 04:48:13.188055', 'step': 22166, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:13.249597', 'step': 22166, 'epoch': 3} {'type': 'loss', 'content': 0.02091854065656662, 'timestamp': '2025-10-01 04:48:13.251777', 'step': 22167, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:13.305581', 'step': 22167, 'epoch': 3} {'type': 'loss', 'content': 0.1218675971031189, 'timestamp': '2025-10-01 04:48:13.313087', 'step': 22168, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:48:13.368544', 'step': 22168, 'epoch': 3} {'type': 'loss', 'content': 0.08546233177185059, 'timestamp': '2025-10-01 04:48:13.370767', 'step': 22169, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:13.424097', 'step': 22169, 'epoch': 3} {'type': 'loss', 'content': 0.09975787252187729, 'timestamp': '2025-10-01 04:48:13.427196', 'step': 22170, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:13.483969', 'step': 22170, 'epoch': 3} {'type': 'loss', 'content': 0.050702616572380066, 'timestamp': '2025-10-01 04:48:13.486677', 'step': 22171, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:13.540169', 'step': 22171, 'epoch': 3} {'type': 'loss', 'content': 0.08401709049940109, 'timestamp': '2025-10-01 04:48:13.546357', 'step': 22172, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:13.599745', 'step': 22172, 'epoch': 3} {'type': 'loss', 'content': 0.08699262142181396, 'timestamp': '2025-10-01 04:48:13.601982', 'step': 22173, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:48:13.661669', 'step': 22173, 'epoch': 3} {'type': 'loss', 'content': 0.10765735059976578, 'timestamp': '2025-10-01 04:48:13.664839', 'step': 22174, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:13.719209', 'step': 22174, 'epoch': 3} {'type': 'loss', 'content': 0.06085461005568504, 'timestamp': '2025-10-01 04:48:13.721142', 'step': 22175, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:13.777626', 'step': 22175, 'epoch': 3} {'type': 'loss', 'content': 0.10218542814254761, 'timestamp': '2025-10-01 04:48:13.787489', 'step': 22176, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:48:13.841453', 'step': 22176, 'epoch': 3} {'type': 'loss', 'content': 0.04904799908399582, 'timestamp': '2025-10-01 04:48:13.843837', 'step': 22177, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:13.897553', 'step': 22177, 'epoch': 3} {'type': 'loss', 'content': 0.06310104578733444, 'timestamp': '2025-10-01 04:48:13.899726', 'step': 22178, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:13.954427', 'step': 22178, 'epoch': 3} {'type': 'loss', 'content': 0.15859633684158325, 'timestamp': '2025-10-01 04:48:13.956708', 'step': 22179, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:14.011784', 'step': 22179, 'epoch': 3} {'type': 'loss', 'content': 0.11510144174098969, 'timestamp': '2025-10-01 04:48:14.017673', 'step': 22180, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:48:14.070841', 'step': 22180, 'epoch': 3} {'type': 'loss', 'content': 0.13117185235023499, 'timestamp': '2025-10-01 04:48:14.072933', 'step': 22181, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:48:14.125854', 'step': 22181, 'epoch': 3} {'type': 'loss', 'content': 0.056522853672504425, 'timestamp': '2025-10-01 04:48:14.127958', 'step': 22182, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:14.181921', 'step': 22182, 'epoch': 3} {'type': 'loss', 'content': 0.0904315635561943, 'timestamp': '2025-10-01 04:48:14.184064', 'step': 22183, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:14.238882', 'step': 22183, 'epoch': 3} {'type': 'loss', 'content': 0.036858752369880676, 'timestamp': '2025-10-01 04:48:14.245270', 'step': 22184, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:14.298643', 'step': 22184, 'epoch': 3} {'type': 'loss', 'content': 0.1425340473651886, 'timestamp': '2025-10-01 04:48:14.300947', 'step': 22185, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:48:14.354850', 'step': 22185, 'epoch': 3} {'type': 'loss', 'content': 0.09502019733190536, 'timestamp': '2025-10-01 04:48:14.356991', 'step': 22186, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:14.410607', 'step': 22186, 'epoch': 3} {'type': 'loss', 'content': 0.0323544517159462, 'timestamp': '2025-10-01 04:48:14.412858', 'step': 22187, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:14.466288', 'step': 22187, 'epoch': 3} {'type': 'loss', 'content': 0.0928749367594719, 'timestamp': '2025-10-01 04:48:14.472480', 'step': 22188, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:14.525453', 'step': 22188, 'epoch': 3} {'type': 'loss', 'content': 0.08448141813278198, 'timestamp': '2025-10-01 04:48:14.527790', 'step': 22189, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:14.581638', 'step': 22189, 'epoch': 3} {'type': 'loss', 'content': 0.058391932398080826, 'timestamp': '2025-10-01 04:48:14.584128', 'step': 22190, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:14.638374', 'step': 22190, 'epoch': 3} {'type': 'loss', 'content': 0.1140032559633255, 'timestamp': '2025-10-01 04:48:14.640763', 'step': 22191, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:48:14.694627', 'step': 22191, 'epoch': 3} {'type': 'loss', 'content': 0.08100594580173492, 'timestamp': '2025-10-01 04:48:14.702887', 'step': 22192, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:14.757236', 'step': 22192, 'epoch': 3} {'type': 'loss', 'content': 0.13466669619083405, 'timestamp': '2025-10-01 04:48:14.759680', 'step': 22193, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:48:14.812685', 'step': 22193, 'epoch': 3} {'type': 'loss', 'content': 0.06728635728359222, 'timestamp': '2025-10-01 04:48:14.819123', 'step': 22194, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:14.873087', 'step': 22194, 'epoch': 3} {'type': 'loss', 'content': 0.046611238270998, 'timestamp': '2025-10-01 04:48:14.875244', 'step': 22195, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:48:14.930308', 'step': 22195, 'epoch': 3} {'type': 'loss', 'content': 0.11233821511268616, 'timestamp': '2025-10-01 04:48:14.937104', 'step': 22196, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:14.995051', 'step': 22196, 'epoch': 3} {'type': 'loss', 'content': 0.02544015645980835, 'timestamp': '2025-10-01 04:48:14.997174', 'step': 22197, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:48:15.052306', 'step': 22197, 'epoch': 3} {'type': 'loss', 'content': 0.11580878496170044, 'timestamp': '2025-10-01 04:48:15.054832', 'step': 22198, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:15.108805', 'step': 22198, 'epoch': 3} {'type': 'loss', 'content': 0.03443070873618126, 'timestamp': '2025-10-01 04:48:15.111369', 'step': 22199, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:15.164526', 'step': 22199, 'epoch': 3} {'type': 'loss', 'content': 0.053769800812006, 'timestamp': '2025-10-01 04:48:15.170540', 'step': 22200, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:15.224181', 'step': 22200, 'epoch': 3} {'type': 'loss', 'content': 0.1587936282157898, 'timestamp': '2025-10-01 04:48:15.226285', 'step': 22201, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:15.289102', 'step': 22201, 'epoch': 3} {'type': 'loss', 'content': 0.042220503091812134, 'timestamp': '2025-10-01 04:48:15.291795', 'step': 22202, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:15.345762', 'step': 22202, 'epoch': 3} {'type': 'loss', 'content': 0.07990164309740067, 'timestamp': '2025-10-01 04:48:15.348129', 'step': 22203, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:15.401519', 'step': 22203, 'epoch': 3} {'type': 'loss', 'content': 0.0075110881589353085, 'timestamp': '2025-10-01 04:48:15.407312', 'step': 22204, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:48:15.463564', 'step': 22204, 'epoch': 3} {'type': 'loss', 'content': 0.028334248811006546, 'timestamp': '2025-10-01 04:48:15.465832', 'step': 22205, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:48:15.519409', 'step': 22205, 'epoch': 3} {'type': 'loss', 'content': 0.12481359392404556, 'timestamp': '2025-10-01 04:48:15.521932', 'step': 22206, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:15.574848', 'step': 22206, 'epoch': 3} {'type': 'loss', 'content': 0.06756969541311264, 'timestamp': '2025-10-01 04:48:15.578627', 'step': 22207, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:15.631896', 'step': 22207, 'epoch': 3} {'type': 'loss', 'content': 0.08325619995594025, 'timestamp': '2025-10-01 04:48:15.637639', 'step': 22208, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:48:15.690858', 'step': 22208, 'epoch': 3} {'type': 'loss', 'content': 0.05202650651335716, 'timestamp': '2025-10-01 04:48:15.692951', 'step': 22209, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:15.746531', 'step': 22209, 'epoch': 3} {'type': 'loss', 'content': 0.09484962373971939, 'timestamp': '2025-10-01 04:48:15.748649', 'step': 22210, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:15.801680', 'step': 22210, 'epoch': 3} {'type': 'loss', 'content': 0.07441709190607071, 'timestamp': '2025-10-01 04:48:15.804197', 'step': 22211, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:15.857992', 'step': 22211, 'epoch': 3} {'type': 'loss', 'content': 0.08655444532632828, 'timestamp': '2025-10-01 04:48:15.864075', 'step': 22212, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:15.916583', 'step': 22212, 'epoch': 3} {'type': 'loss', 'content': 0.08312900364398956, 'timestamp': '2025-10-01 04:48:15.918629', 'step': 22213, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:15.971317', 'step': 22213, 'epoch': 3} {'type': 'loss', 'content': 0.043081264942884445, 'timestamp': '2025-10-01 04:48:15.973458', 'step': 22214, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:16.030455', 'step': 22214, 'epoch': 3} {'type': 'loss', 'content': 0.07759593427181244, 'timestamp': '2025-10-01 04:48:16.032541', 'step': 22215, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:16.086025', 'step': 22215, 'epoch': 3} {'type': 'loss', 'content': 0.08835183829069138, 'timestamp': '2025-10-01 04:48:16.092360', 'step': 22216, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:16.146324', 'step': 22216, 'epoch': 3} {'type': 'loss', 'content': 0.07281360030174255, 'timestamp': '2025-10-01 04:48:16.148833', 'step': 22217, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:16.202814', 'step': 22217, 'epoch': 3} {'type': 'loss', 'content': 0.12162528187036514, 'timestamp': '2025-10-01 04:48:16.205387', 'step': 22218, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:16.259843', 'step': 22218, 'epoch': 3} {'type': 'loss', 'content': 0.06345668435096741, 'timestamp': '2025-10-01 04:48:16.261953', 'step': 22219, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:16.315745', 'step': 22219, 'epoch': 3} {'type': 'loss', 'content': 0.040526267141103745, 'timestamp': '2025-10-01 04:48:16.321461', 'step': 22220, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:16.375026', 'step': 22220, 'epoch': 3} {'type': 'loss', 'content': 0.06820525974035263, 'timestamp': '2025-10-01 04:48:16.377425', 'step': 22221, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:16.431146', 'step': 22221, 'epoch': 3} {'type': 'loss', 'content': 0.13222986459732056, 'timestamp': '2025-10-01 04:48:16.433548', 'step': 22222, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:48:16.487020', 'step': 22222, 'epoch': 3} {'type': 'loss', 'content': 0.08577403426170349, 'timestamp': '2025-10-01 04:48:16.489174', 'step': 22223, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:16.542322', 'step': 22223, 'epoch': 3} {'type': 'loss', 'content': 0.04909141734242439, 'timestamp': '2025-10-01 04:48:16.555191', 'step': 22224, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:48:16.608429', 'step': 22224, 'epoch': 3} {'type': 'loss', 'content': 0.04375129193067551, 'timestamp': '2025-10-01 04:48:16.610607', 'step': 22225, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:16.663641', 'step': 22225, 'epoch': 3} {'type': 'loss', 'content': 0.1042763888835907, 'timestamp': '2025-10-01 04:48:16.665856', 'step': 22226, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:48:16.718936', 'step': 22226, 'epoch': 3} {'type': 'loss', 'content': 0.07190992683172226, 'timestamp': '2025-10-01 04:48:16.722268', 'step': 22227, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:16.775477', 'step': 22227, 'epoch': 3} {'type': 'loss', 'content': 0.07824644446372986, 'timestamp': '2025-10-01 04:48:16.781209', 'step': 22228, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:16.834333', 'step': 22228, 'epoch': 3} {'type': 'loss', 'content': 0.0826926901936531, 'timestamp': '2025-10-01 04:48:16.836429', 'step': 22229, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:48:16.889321', 'step': 22229, 'epoch': 3} {'type': 'loss', 'content': 0.14651522040367126, 'timestamp': '2025-10-01 04:48:16.892124', 'step': 22230, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:48:16.944951', 'step': 22230, 'epoch': 3} {'type': 'loss', 'content': 0.037469349801540375, 'timestamp': '2025-10-01 04:48:16.947524', 'step': 22231, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:17.001901', 'step': 22231, 'epoch': 3} {'type': 'loss', 'content': 0.14596427977085114, 'timestamp': '2025-10-01 04:48:17.008398', 'step': 22232, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:48:17.062190', 'step': 22232, 'epoch': 3} {'type': 'loss', 'content': 0.11759404838085175, 'timestamp': '2025-10-01 04:48:17.064675', 'step': 22233, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:48:17.118361', 'step': 22233, 'epoch': 3} {'type': 'loss', 'content': 0.10322359204292297, 'timestamp': '2025-10-01 04:48:17.121415', 'step': 22234, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:17.177076', 'step': 22234, 'epoch': 3} {'type': 'loss', 'content': 0.11398245394229889, 'timestamp': '2025-10-01 04:48:17.179363', 'step': 22235, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:17.233751', 'step': 22235, 'epoch': 3} {'type': 'loss', 'content': 0.06783512979745865, 'timestamp': '2025-10-01 04:48:17.240010', 'step': 22236, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:48:17.293912', 'step': 22236, 'epoch': 3} {'type': 'loss', 'content': 0.12597499787807465, 'timestamp': '2025-10-01 04:48:17.296113', 'step': 22237, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:48:17.350784', 'step': 22237, 'epoch': 3} {'type': 'loss', 'content': 0.08165374398231506, 'timestamp': '2025-10-01 04:48:17.353524', 'step': 22238, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:17.408016', 'step': 22238, 'epoch': 3} {'type': 'loss', 'content': 0.052991848438978195, 'timestamp': '2025-10-01 04:48:17.410635', 'step': 22239, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:17.465648', 'step': 22239, 'epoch': 3} {'type': 'loss', 'content': 0.11639296263456345, 'timestamp': '2025-10-01 04:48:17.471756', 'step': 22240, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:17.526648', 'step': 22240, 'epoch': 3} {'type': 'loss', 'content': 0.2053191065788269, 'timestamp': '2025-10-01 04:48:17.529187', 'step': 22241, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:17.583713', 'step': 22241, 'epoch': 3} {'type': 'loss', 'content': 0.08300665020942688, 'timestamp': '2025-10-01 04:48:17.586218', 'step': 22242, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:17.641502', 'step': 22242, 'epoch': 3} {'type': 'loss', 'content': 0.08068341016769409, 'timestamp': '2025-10-01 04:48:17.643981', 'step': 22243, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:17.698171', 'step': 22243, 'epoch': 3} {'type': 'loss', 'content': 0.048806846141815186, 'timestamp': '2025-10-01 04:48:17.704304', 'step': 22244, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:17.757867', 'step': 22244, 'epoch': 3} {'type': 'loss', 'content': 0.08944719284772873, 'timestamp': '2025-10-01 04:48:17.760583', 'step': 22245, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:48:17.814077', 'step': 22245, 'epoch': 3} {'type': 'loss', 'content': 0.10748530179262161, 'timestamp': '2025-10-01 04:48:17.816508', 'step': 22246, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:48:17.870998', 'step': 22246, 'epoch': 3} {'type': 'loss', 'content': 0.07600025832653046, 'timestamp': '2025-10-01 04:48:17.873477', 'step': 22247, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:17.927561', 'step': 22247, 'epoch': 3} {'type': 'loss', 'content': 0.06080671772360802, 'timestamp': '2025-10-01 04:48:17.933473', 'step': 22248, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:17.986885', 'step': 22248, 'epoch': 3} {'type': 'loss', 'content': 0.06393791735172272, 'timestamp': '2025-10-01 04:48:17.989446', 'step': 22249, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:18.050495', 'step': 22249, 'epoch': 3} {'type': 'loss', 'content': 0.15798896551132202, 'timestamp': '2025-10-01 04:48:18.052715', 'step': 22250, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:18.106819', 'step': 22250, 'epoch': 3} {'type': 'loss', 'content': 0.04422489553689957, 'timestamp': '2025-10-01 04:48:18.109577', 'step': 22251, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:48:18.162820', 'step': 22251, 'epoch': 3} {'type': 'loss', 'content': 0.12106968462467194, 'timestamp': '2025-10-01 04:48:18.168795', 'step': 22252, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:18.222574', 'step': 22252, 'epoch': 3} {'type': 'loss', 'content': 0.0967419445514679, 'timestamp': '2025-10-01 04:48:18.225004', 'step': 22253, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:48:18.278797', 'step': 22253, 'epoch': 3} {'type': 'loss', 'content': 0.04360621050000191, 'timestamp': '2025-10-01 04:48:18.281310', 'step': 22254, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:48:18.334875', 'step': 22254, 'epoch': 3} {'type': 'loss', 'content': 0.07010787725448608, 'timestamp': '2025-10-01 04:48:18.337408', 'step': 22255, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:48:18.391400', 'step': 22255, 'epoch': 3} {'type': 'loss', 'content': 0.08896492421627045, 'timestamp': '2025-10-01 04:48:18.397218', 'step': 22256, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:18.450107', 'step': 22256, 'epoch': 3} {'type': 'loss', 'content': 0.13383127748966217, 'timestamp': '2025-10-01 04:48:18.452271', 'step': 22257, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:18.505122', 'step': 22257, 'epoch': 3} {'type': 'loss', 'content': 0.02979312837123871, 'timestamp': '2025-10-01 04:48:18.507354', 'step': 22258, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:48:18.560504', 'step': 22258, 'epoch': 3} {'type': 'loss', 'content': 0.10512007027864456, 'timestamp': '2025-10-01 04:48:18.563359', 'step': 22259, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:18.619448', 'step': 22259, 'epoch': 3} {'type': 'loss', 'content': 0.08940210938453674, 'timestamp': '2025-10-01 04:48:18.626185', 'step': 22260, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:18.691589', 'step': 22260, 'epoch': 3} {'type': 'loss', 'content': 0.10404788702726364, 'timestamp': '2025-10-01 04:48:18.693961', 'step': 22261, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:18.747762', 'step': 22261, 'epoch': 3} {'type': 'loss', 'content': 0.09900547564029694, 'timestamp': '2025-10-01 04:48:18.750097', 'step': 22262, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:18.803326', 'step': 22262, 'epoch': 3} {'type': 'loss', 'content': 0.02896086499094963, 'timestamp': '2025-10-01 04:48:18.805596', 'step': 22263, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:18.858584', 'step': 22263, 'epoch': 3} {'type': 'loss', 'content': 0.14591063559055328, 'timestamp': '2025-10-01 04:48:18.864373', 'step': 22264, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:48:18.917381', 'step': 22264, 'epoch': 3} {'type': 'loss', 'content': 0.08706298470497131, 'timestamp': '2025-10-01 04:48:18.919413', 'step': 22265, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:48:18.972490', 'step': 22265, 'epoch': 3} {'type': 'loss', 'content': 0.07431787997484207, 'timestamp': '2025-10-01 04:48:18.977842', 'step': 22266, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:48:19.032209', 'step': 22266, 'epoch': 3} {'type': 'loss', 'content': 0.06197001039981842, 'timestamp': '2025-10-01 04:48:19.034364', 'step': 22267, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:48:19.087172', 'step': 22267, 'epoch': 3} {'type': 'loss', 'content': 0.07457543909549713, 'timestamp': '2025-10-01 04:48:19.092979', 'step': 22268, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:19.145632', 'step': 22268, 'epoch': 3} {'type': 'loss', 'content': 0.01532484870404005, 'timestamp': '2025-10-01 04:48:19.147637', 'step': 22269, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:19.200620', 'step': 22269, 'epoch': 3} {'type': 'loss', 'content': 0.08665021508932114, 'timestamp': '2025-10-01 04:48:19.202792', 'step': 22270, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:48:19.256003', 'step': 22270, 'epoch': 3} {'type': 'loss', 'content': 0.09182117134332657, 'timestamp': '2025-10-01 04:48:19.258163', 'step': 22271, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:19.311413', 'step': 22271, 'epoch': 3} {'type': 'loss', 'content': 0.10988502949476242, 'timestamp': '2025-10-01 04:48:19.320326', 'step': 22272, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:19.396468', 'step': 22272, 'epoch': 3} {'type': 'loss', 'content': 0.06323441863059998, 'timestamp': '2025-10-01 04:48:19.398740', 'step': 22273, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:19.453845', 'step': 22273, 'epoch': 3} {'type': 'loss', 'content': 0.09138604253530502, 'timestamp': '2025-10-01 04:48:19.456063', 'step': 22274, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:19.509107', 'step': 22274, 'epoch': 3} {'type': 'loss', 'content': 0.18142351508140564, 'timestamp': '2025-10-01 04:48:19.511417', 'step': 22275, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:48:19.564134', 'step': 22275, 'epoch': 3} {'type': 'loss', 'content': 0.08279506117105484, 'timestamp': '2025-10-01 04:48:19.569798', 'step': 22276, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:19.638017', 'step': 22276, 'epoch': 3} {'type': 'loss', 'content': 0.16051821410655975, 'timestamp': '2025-10-01 04:48:19.640265', 'step': 22277, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:48:19.693473', 'step': 22277, 'epoch': 3} {'type': 'loss', 'content': 0.03703933209180832, 'timestamp': '2025-10-01 04:48:19.695537', 'step': 22278, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:19.749123', 'step': 22278, 'epoch': 3} {'type': 'loss', 'content': 0.10795403271913528, 'timestamp': '2025-10-01 04:48:19.751408', 'step': 22279, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:48:19.808303', 'step': 22279, 'epoch': 3} {'type': 'loss', 'content': 0.08909231424331665, 'timestamp': '2025-10-01 04:48:19.818996', 'step': 22280, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:48:19.871723', 'step': 22280, 'epoch': 3} {'type': 'loss', 'content': 0.06622744351625443, 'timestamp': '2025-10-01 04:48:19.874015', 'step': 22281, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:19.930238', 'step': 22281, 'epoch': 3} {'type': 'loss', 'content': 0.10459090024232864, 'timestamp': '2025-10-01 04:48:19.932553', 'step': 22282, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:19.986268', 'step': 22282, 'epoch': 3} {'type': 'loss', 'content': 0.029707834124565125, 'timestamp': '2025-10-01 04:48:19.988780', 'step': 22283, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:20.047000', 'step': 22283, 'epoch': 3} {'type': 'loss', 'content': 0.08662668615579605, 'timestamp': '2025-10-01 04:48:20.052710', 'step': 22284, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:20.105733', 'step': 22284, 'epoch': 3} {'type': 'loss', 'content': 0.1290176808834076, 'timestamp': '2025-10-01 04:48:20.107906', 'step': 22285, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:20.160759', 'step': 22285, 'epoch': 3} {'type': 'loss', 'content': 0.022311566397547722, 'timestamp': '2025-10-01 04:48:20.165915', 'step': 22286, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:48:20.218685', 'step': 22286, 'epoch': 3} {'type': 'loss', 'content': 0.009720840491354465, 'timestamp': '2025-10-01 04:48:20.220816', 'step': 22287, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:20.274276', 'step': 22287, 'epoch': 3} {'type': 'loss', 'content': 0.09627561271190643, 'timestamp': '2025-10-01 04:48:20.280028', 'step': 22288, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:20.332963', 'step': 22288, 'epoch': 3} {'type': 'loss', 'content': 0.06110415980219841, 'timestamp': '2025-10-01 04:48:20.335342', 'step': 22289, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:20.387997', 'step': 22289, 'epoch': 3} {'type': 'loss', 'content': 0.1034541055560112, 'timestamp': '2025-10-01 04:48:20.390224', 'step': 22290, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-01 04:48:20.443295', 'step': 22290, 'epoch': 3} {'type': 'loss', 'content': 0.04214944317936897, 'timestamp': '2025-10-01 04:48:20.445782', 'step': 22291, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:20.498981', 'step': 22291, 'epoch': 3} {'type': 'loss', 'content': 0.11507148295640945, 'timestamp': '2025-10-01 04:48:20.513967', 'step': 22292, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:20.596298', 'step': 22292, 'epoch': 3} {'type': 'loss', 'content': 0.14260944724082947, 'timestamp': '2025-10-01 04:48:20.598687', 'step': 22293, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:48:20.652543', 'step': 22293, 'epoch': 3} {'type': 'loss', 'content': 0.07792813330888748, 'timestamp': '2025-10-01 04:48:20.655385', 'step': 22294, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:20.709404', 'step': 22294, 'epoch': 3} {'type': 'loss', 'content': 0.09181082248687744, 'timestamp': '2025-10-01 04:48:20.711645', 'step': 22295, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:48:20.764611', 'step': 22295, 'epoch': 3} {'type': 'loss', 'content': 0.12034454941749573, 'timestamp': '2025-10-01 04:48:20.776083', 'step': 22296, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:48:20.850761', 'step': 22296, 'epoch': 3} {'type': 'loss', 'content': 0.06802605837583542, 'timestamp': '2025-10-01 04:48:20.852978', 'step': 22297, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:48:20.910179', 'step': 22297, 'epoch': 3} {'type': 'loss', 'content': 0.035764191299676895, 'timestamp': '2025-10-01 04:48:20.913937', 'step': 22298, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:20.966918', 'step': 22298, 'epoch': 3} {'type': 'loss', 'content': 0.021217146888375282, 'timestamp': '2025-10-01 04:48:20.972950', 'step': 22299, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:48:21.025401', 'step': 22299, 'epoch': 3} {'type': 'loss', 'content': 0.05689147859811783, 'timestamp': '2025-10-01 04:48:21.031159', 'step': 22300, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:21.087667', 'step': 22300, 'epoch': 3} {'type': 'loss', 'content': 0.1918225735425949, 'timestamp': '2025-10-01 04:48:21.089890', 'step': 22301, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:21.145172', 'step': 22301, 'epoch': 3} {'type': 'loss', 'content': 0.06596656143665314, 'timestamp': '2025-10-01 04:48:21.147369', 'step': 22302, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:21.201900', 'step': 22302, 'epoch': 3} {'type': 'loss', 'content': 0.09199479967355728, 'timestamp': '2025-10-01 04:48:21.204428', 'step': 22303, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:21.257897', 'step': 22303, 'epoch': 3} {'type': 'loss', 'content': 0.09024806320667267, 'timestamp': '2025-10-01 04:48:21.263545', 'step': 22304, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:21.316730', 'step': 22304, 'epoch': 3} {'type': 'loss', 'content': 0.09823795408010483, 'timestamp': '2025-10-01 04:48:21.319352', 'step': 22305, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:48:21.371925', 'step': 22305, 'epoch': 3} {'type': 'loss', 'content': 0.05698320269584656, 'timestamp': '2025-10-01 04:48:21.374006', 'step': 22306, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:21.428364', 'step': 22306, 'epoch': 3} {'type': 'loss', 'content': 0.02640720084309578, 'timestamp': '2025-10-01 04:48:21.430524', 'step': 22307, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:48:21.490750', 'step': 22307, 'epoch': 3} {'type': 'loss', 'content': 0.05321089178323746, 'timestamp': '2025-10-01 04:48:21.496480', 'step': 22308, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:48:21.550856', 'step': 22308, 'epoch': 3} {'type': 'loss', 'content': 0.09941291809082031, 'timestamp': '2025-10-01 04:48:21.553077', 'step': 22309, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:48:21.606919', 'step': 22309, 'epoch': 3} {'type': 'loss', 'content': 0.05699634179472923, 'timestamp': '2025-10-01 04:48:21.609149', 'step': 22310, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:48:21.661948', 'step': 22310, 'epoch': 3} {'type': 'loss', 'content': 0.11475903540849686, 'timestamp': '2025-10-01 04:48:21.663881', 'step': 22311, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:21.717226', 'step': 22311, 'epoch': 3} {'type': 'loss', 'content': 0.09268615394830704, 'timestamp': '2025-10-01 04:48:21.722896', 'step': 22312, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:48:21.777003', 'step': 22312, 'epoch': 3} {'type': 'loss', 'content': 0.08351946622133255, 'timestamp': '2025-10-01 04:48:21.779197', 'step': 22313, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:48:21.836126', 'step': 22313, 'epoch': 3} {'type': 'loss', 'content': 0.11432056128978729, 'timestamp': '2025-10-01 04:48:21.838847', 'step': 22314, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:48:21.892178', 'step': 22314, 'epoch': 3} {'type': 'loss', 'content': 0.04762072488665581, 'timestamp': '2025-10-01 04:48:21.895958', 'step': 22315, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:21.949005', 'step': 22315, 'epoch': 3} {'type': 'loss', 'content': 0.09463106095790863, 'timestamp': '2025-10-01 04:48:21.954888', 'step': 22316, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:48:22.012248', 'step': 22316, 'epoch': 3} {'type': 'loss', 'content': 0.0997234508395195, 'timestamp': '2025-10-01 04:48:22.014716', 'step': 22317, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:22.068487', 'step': 22317, 'epoch': 3} {'type': 'loss', 'content': 0.04690351337194443, 'timestamp': '2025-10-01 04:48:22.070846', 'step': 22318, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:22.124726', 'step': 22318, 'epoch': 3} {'type': 'loss', 'content': 0.028432456776499748, 'timestamp': '2025-10-01 04:48:22.126888', 'step': 22319, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:22.181159', 'step': 22319, 'epoch': 3} {'type': 'loss', 'content': 0.04275418072938919, 'timestamp': '2025-10-01 04:48:22.186973', 'step': 22320, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:48:22.239474', 'step': 22320, 'epoch': 3} {'type': 'loss', 'content': 0.13636353611946106, 'timestamp': '2025-10-01 04:48:22.246842', 'step': 22321, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:22.300648', 'step': 22321, 'epoch': 3} {'type': 'loss', 'content': 0.10405683517456055, 'timestamp': '2025-10-01 04:48:22.306205', 'step': 22322, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:48:22.359827', 'step': 22322, 'epoch': 3} {'type': 'loss', 'content': 0.1079045832157135, 'timestamp': '2025-10-01 04:48:22.362372', 'step': 22323, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:22.420140', 'step': 22323, 'epoch': 3} {'type': 'loss', 'content': 0.09430350363254547, 'timestamp': '2025-10-01 04:48:22.425834', 'step': 22324, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:48:22.478752', 'step': 22324, 'epoch': 3} {'type': 'loss', 'content': 0.05666263774037361, 'timestamp': '2025-10-01 04:48:22.480920', 'step': 22325, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:22.549914', 'step': 22325, 'epoch': 3} {'type': 'loss', 'content': 0.08729633688926697, 'timestamp': '2025-10-01 04:48:22.552067', 'step': 22326, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:22.605369', 'step': 22326, 'epoch': 3} {'type': 'loss', 'content': 0.08348449319601059, 'timestamp': '2025-10-01 04:48:22.607609', 'step': 22327, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:22.660556', 'step': 22327, 'epoch': 3} {'type': 'loss', 'content': 0.2175760567188263, 'timestamp': '2025-10-01 04:48:22.666118', 'step': 22328, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:22.719592', 'step': 22328, 'epoch': 3} {'type': 'loss', 'content': 0.1556759476661682, 'timestamp': '2025-10-01 04:48:22.721687', 'step': 22329, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:22.774891', 'step': 22329, 'epoch': 3} {'type': 'loss', 'content': 0.05280062183737755, 'timestamp': '2025-10-01 04:48:22.777461', 'step': 22330, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:22.830896', 'step': 22330, 'epoch': 3} {'type': 'loss', 'content': 0.021491996943950653, 'timestamp': '2025-10-01 04:48:22.833122', 'step': 22331, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:22.886432', 'step': 22331, 'epoch': 3} {'type': 'loss', 'content': 0.06856874376535416, 'timestamp': '2025-10-01 04:48:22.892871', 'step': 22332, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:48:22.945684', 'step': 22332, 'epoch': 3} {'type': 'loss', 'content': 0.08051177114248276, 'timestamp': '2025-10-01 04:48:22.948307', 'step': 22333, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:23.001631', 'step': 22333, 'epoch': 3} {'type': 'loss', 'content': 0.04445747286081314, 'timestamp': '2025-10-01 04:48:23.005637', 'step': 22334, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:23.059779', 'step': 22334, 'epoch': 3} {'type': 'loss', 'content': 0.08804876357316971, 'timestamp': '2025-10-01 04:48:23.062042', 'step': 22335, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:23.117967', 'step': 22335, 'epoch': 3} {'type': 'loss', 'content': 0.030844882130622864, 'timestamp': '2025-10-01 04:48:23.124175', 'step': 22336, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:48:23.182543', 'step': 22336, 'epoch': 3} {'type': 'loss', 'content': 0.10696463286876678, 'timestamp': '2025-10-01 04:48:23.190626', 'step': 22337, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:23.250864', 'step': 22337, 'epoch': 3} {'type': 'loss', 'content': 0.027954692021012306, 'timestamp': '2025-10-01 04:48:23.253603', 'step': 22338, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:23.307339', 'step': 22338, 'epoch': 3} {'type': 'loss', 'content': 0.10210604965686798, 'timestamp': '2025-10-01 04:48:23.309840', 'step': 22339, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:48:23.364233', 'step': 22339, 'epoch': 3} {'type': 'loss', 'content': 0.10469401627779007, 'timestamp': '2025-10-01 04:48:23.369943', 'step': 22340, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:48:23.423292', 'step': 22340, 'epoch': 3} {'type': 'loss', 'content': 0.012526164762675762, 'timestamp': '2025-10-01 04:48:23.425482', 'step': 22341, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:23.478787', 'step': 22341, 'epoch': 3} {'type': 'loss', 'content': 0.09181161969900131, 'timestamp': '2025-10-01 04:48:23.481339', 'step': 22342, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:23.534799', 'step': 22342, 'epoch': 3} {'type': 'loss', 'content': 0.09196638315916061, 'timestamp': '2025-10-01 04:48:23.537000', 'step': 22343, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:23.590880', 'step': 22343, 'epoch': 3} {'type': 'loss', 'content': 0.05246499180793762, 'timestamp': '2025-10-01 04:48:23.596646', 'step': 22344, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:23.652678', 'step': 22344, 'epoch': 3} {'type': 'loss', 'content': 0.1253148466348648, 'timestamp': '2025-10-01 04:48:23.655077', 'step': 22345, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:23.708301', 'step': 22345, 'epoch': 3} {'type': 'loss', 'content': 0.08246442675590515, 'timestamp': '2025-10-01 04:48:23.710600', 'step': 22346, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:23.764727', 'step': 22346, 'epoch': 3} {'type': 'loss', 'content': 0.06018922105431557, 'timestamp': '2025-10-01 04:48:23.766902', 'step': 22347, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:23.821952', 'step': 22347, 'epoch': 3} {'type': 'loss', 'content': 0.05052299425005913, 'timestamp': '2025-10-01 04:48:23.827857', 'step': 22348, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:48:23.881139', 'step': 22348, 'epoch': 3} {'type': 'loss', 'content': 0.1157258078455925, 'timestamp': '2025-10-01 04:48:23.884596', 'step': 22349, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:23.938402', 'step': 22349, 'epoch': 3} {'type': 'loss', 'content': 0.06601542979478836, 'timestamp': '2025-10-01 04:48:23.940386', 'step': 22350, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:48:23.993573', 'step': 22350, 'epoch': 3} {'type': 'loss', 'content': 0.10263852030038834, 'timestamp': '2025-10-01 04:48:23.995961', 'step': 22351, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:48:24.055004', 'step': 22351, 'epoch': 3} {'type': 'loss', 'content': 0.1301538050174713, 'timestamp': '2025-10-01 04:48:24.060679', 'step': 22352, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:24.114023', 'step': 22352, 'epoch': 3} {'type': 'loss', 'content': 0.039614588022232056, 'timestamp': '2025-10-01 04:48:24.116168', 'step': 22353, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-01 04:48:24.169122', 'step': 22353, 'epoch': 3} {'type': 'loss', 'content': 0.04464934766292572, 'timestamp': '2025-10-01 04:48:24.172674', 'step': 22354, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:24.226263', 'step': 22354, 'epoch': 3} {'type': 'loss', 'content': 0.09344606101512909, 'timestamp': '2025-10-01 04:48:24.228569', 'step': 22355, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:24.281580', 'step': 22355, 'epoch': 3} {'type': 'loss', 'content': 0.08424725383520126, 'timestamp': '2025-10-01 04:48:24.287130', 'step': 22356, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:24.339988', 'step': 22356, 'epoch': 3} {'type': 'loss', 'content': 0.11193211376667023, 'timestamp': '2025-10-01 04:48:24.342215', 'step': 22357, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:24.395277', 'step': 22357, 'epoch': 3} {'type': 'loss', 'content': 0.007520494516938925, 'timestamp': '2025-10-01 04:48:24.397429', 'step': 22358, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:48:24.450436', 'step': 22358, 'epoch': 3} {'type': 'loss', 'content': 0.04548973962664604, 'timestamp': '2025-10-01 04:48:24.452732', 'step': 22359, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:48:24.507243', 'step': 22359, 'epoch': 3} {'type': 'loss', 'content': 0.08745108544826508, 'timestamp': '2025-10-01 04:48:24.512997', 'step': 22360, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:24.565909', 'step': 22360, 'epoch': 3} {'type': 'loss', 'content': 0.10726332664489746, 'timestamp': '2025-10-01 04:48:24.574410', 'step': 22361, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:48:24.628108', 'step': 22361, 'epoch': 3} {'type': 'loss', 'content': 0.08523865044116974, 'timestamp': '2025-10-01 04:48:24.630308', 'step': 22362, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-01 04:48:24.686597', 'step': 22362, 'epoch': 3} {'type': 'loss', 'content': 0.1363888382911682, 'timestamp': '2025-10-01 04:48:24.688896', 'step': 22363, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:24.750552', 'step': 22363, 'epoch': 3} {'type': 'loss', 'content': 0.03375003859400749, 'timestamp': '2025-10-01 04:48:24.756929', 'step': 22364, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:24.810350', 'step': 22364, 'epoch': 3} {'type': 'loss', 'content': 0.0474909283220768, 'timestamp': '2025-10-01 04:48:24.813057', 'step': 22365, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:24.866569', 'step': 22365, 'epoch': 3} {'type': 'loss', 'content': 0.080780528485775, 'timestamp': '2025-10-01 04:48:24.869033', 'step': 22366, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:24.922928', 'step': 22366, 'epoch': 3} {'type': 'loss', 'content': 0.1645832508802414, 'timestamp': '2025-10-01 04:48:24.925162', 'step': 22367, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:24.978325', 'step': 22367, 'epoch': 3} {'type': 'loss', 'content': 0.14733092486858368, 'timestamp': '2025-10-01 04:48:24.984479', 'step': 22368, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-01 04:48:37.899980', 'step': 22368, 'epoch': 3} {'type': 'pplx', 'content': 7285.699507107303, 'timestamp': '2025-10-01 04:48:37.903040', 'step': 22368, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-01 04:48:37.957122', 'step': 22368, 'epoch': 3} {'type': 'loss', 'content': 0.10757046937942505, 'timestamp': '2025-10-01 04:48:37.962058', 'step': 22369, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:38.016286', 'step': 22369, 'epoch': 3} {'type': 'loss', 'content': 0.06423012912273407, 'timestamp': '2025-10-01 04:48:38.018374', 'step': 22370, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:38.072301', 'step': 22370, 'epoch': 3} {'type': 'loss', 'content': 0.08217896521091461, 'timestamp': '2025-10-01 04:48:38.075563', 'step': 22371, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:38.136491', 'step': 22371, 'epoch': 3} {'type': 'loss', 'content': 0.019379910081624985, 'timestamp': '2025-10-01 04:48:38.142829', 'step': 22372, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:38.200951', 'step': 22372, 'epoch': 3} {'type': 'loss', 'content': 0.06348264962434769, 'timestamp': '2025-10-01 04:48:38.203094', 'step': 22373, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:38.257274', 'step': 22373, 'epoch': 3} {'type': 'loss', 'content': 0.044467683881521225, 'timestamp': '2025-10-01 04:48:38.259582', 'step': 22374, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-01 04:48:38.319130', 'step': 22374, 'epoch': 3} {'type': 'loss', 'content': 0.20950984954833984, 'timestamp': '2025-10-01 04:48:38.321269', 'step': 22375, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-01 04:48:38.375120', 'step': 22375, 'epoch': 3} {'type': 'loss', 'content': 0.08376842737197876, 'timestamp': '2025-10-01 04:48:38.381128', 'step': 22376, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:38.440007', 'step': 22376, 'epoch': 3} {'type': 'loss', 'content': 0.07374852895736694, 'timestamp': '2025-10-01 04:48:38.445185', 'step': 22377, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-01 04:48:38.499050', 'step': 22377, 'epoch': 3} {'type': 'loss', 'content': 0.06195471063256264, 'timestamp': '2025-10-01 04:48:38.501331', 'step': 22378, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-01 04:48:38.554926', 'step': 22378, 'epoch': 3} {'type': 'loss', 'content': 0.019898351281881332, 'timestamp': '2025-10-01 04:48:38.557344', 'step': 22379, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [1, 208], 'flops': 1040006410960.0}, 'timestamp': '2025-10-01 04:48:38.612648', 'step': 22379, 'epoch': 3} {'type': 'loss', 'content': 0.03567060828208923, 'timestamp': '2025-10-01 04:48:38.619024', 'step': 22380, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-01 04:48:51.703352', 'step': 22380, 'epoch': 3} {'type': 'pplx', 'content': 7435.135248774508, 'timestamp': '2025-10-01 04:48:51.706479', 'step': 22380, 'epoch': 3} {'type': 'best_pplx', 'content': 7285.699507107303, 'timestamp': '2025-10-01 04:48:51.707988', 'step': 22380, 'epoch': 3} {'type': 'best_step', 'content': 22368, 'timestamp': '2025-10-01 04:48:51.709240', 'step': 22380, 'epoch': 3} {'type': 'total_pplx_flops', 'content': 50797850157497600, 'timestamp': '2025-10-01 04:48:51.710420', 'step': 22380, 'epoch': 3} {'type': 'total_train_flops', 'content': 5.161879486776446e+16, 'timestamp': '2025-10-01 04:48:51.712276', 'step': 22380, 'epoch': 3}